rm(list = ls())
# Use pacman to check whether packages are installed, if not load
if (!require("pacman")) install.packages("pacman")
library(pacman)
# Unload all package to begin in a session with only base packages
pacman::p_unload("all")
# Install packages
pacman::p_load(
here,
skimr,
survival,
survminer,
rms,
cmprsk,
riskRegression,
mstate,
pseudo,
pec,
plotrix,
knitr,
splines,
kableExtra,
flextable,
gtsummary,
boot,
tidyverse,
rsample,
gridExtra,
webshot,
patchwork,
survival,
ggsci,
cowplot,
scales,
patchwork,
labelled,
glue,
dcurves,
broom,
downlit,
xml2,
gghalves,
devtools,
htmltools,
gghalves,
ggtext,
DiagrammeR,
gt,
janitor,
VIM,
PerformanceAnalytics,
mice,
rms,
naniar,
DescTools,
gtools,
ggExtra,
furrr,
future,
ggmice,
parallel,
tictoc,
rio,
tidymodels
)
if (!require("impstep")) remotes::install_github("bgravesteijn/impstep", force = TRUE)
if (!require("smplot2")) devtools::install_github('smin95/smplot2', force = TRUE)
library(impstep)6 - Validacion de EcuacioneS KFRE Recalibradas en datos imputados
1 Code to reproduce results of the manuscript ‘Kidney Failure Prediction: Multicenter External Validation of the KFRE Model in Patients with CKD Stages 3-4 in Peru’
1.1 Introduction
This document presents the code and results of the main analysis shown in the article.
1.2 Setup
1.2.0.1 Funciones
source(here("Code", "source", "kfre_pi.R"))
source(here("Code", "source", "kfre_pr.R"))
source(here("Code", "source", "oe_ratio.R"))
source(here("Code", "source", "calibration_intercept.R"))
source(here("Code", "source", "calibration_slope.R"))
source(here("Code", "source", "auc.R"))
source(here("Code", "source", "auc_brier_boot.R"))
source(here("Code", "source", "validate.mids.R"))
source(here("Code", "source", "pool.validate.mids.R"))
source(here("Code", "source", "pool.auc.mids.R"))
source(here("Code", "source", "process_imp_cal_plot.R"))
source(here("Code", "source", "predict.mira.R"))
source(here("Code", "source", "performance_measures.R"))
source(here("Code", "source", "tidy_performance_stack.R"))
source(here("Code", "source", "tidy_pool.R"))
source(here("Code", "source", "process_imp_cal_plot2.R"))
source(here("Code", "source", "print_equation.R")) 1.2.1 Importar datos
Las ecuaciones originales se muestran a continuacion:
eq_kfre_original2y <- data.frame(
vars = c("age", "male", "eGFR", "logACR"),
coefs = c(-0.2201, 0.2467, -0.5567, 0.4510),
scale = c(10, 1, 5, 1),
center = c(7.036, 0.5642, 7.222, 5.137)
)
st02y <- 0.9832
eq_kfre_original5y <- data.frame(
vars = c("age", "male", "eGFR", "logACR"),
coefs = c(-0.2201, 0.2467, -0.5567, 0.4510),
scale = c(10, 1, 5, 1),
center = c(7.036, 0.5642, 7.222, 5.137)
)
st05y <- 0.9365- A 2 anios:
print_equation(eq_kfre_original2y, st02y)\[1 - 0.9832^{exp(- 0.2201 \times (age / 10 - 7.036) + 0.2467 \times (male / 1 - 0.5642) - 0.5567 \times (eGFR / 5 - 7.222) + 0.451 \times (logACR / 1 - 5.137))}\]
- A 5 anios:
print_equation(eq_kfre_original2y, st02y)\[1 - 0.9832^{exp(- 0.2201 \times (age / 10 - 7.036) + 0.2467 \times (male / 1 - 0.5642) - 0.5567 \times (eGFR / 5 - 7.222) + 0.451 \times (logACR / 1 - 5.137))}\]
Las nuevas sobrevidas basales y los factores de recalibracion de los coeficientes de las ecuaciones recalibradas, dependiendo del metodo usado, se muestran a continuacion
recal_loads <- import(here("Data", "Tidy", "equations", "recal_loads.rds"))
recal_loads |>
kbl() |>
kable_classic()| metodo | year | sobrev_recal | fc_coef |
|---|---|---|---|
| A | 2 | 0.9766676 | 1.0000000 |
| A | 5 | 0.9496968 | 1.0000000 |
| B | 2 | 0.9624290 | 0.7405622 |
| B | 5 | 0.9241750 | 0.7405622 |
| C | 2 | 0.9772796 | 1.0000000 |
| C | 5 | 0.9537293 | 1.0000000 |
| D | 2 | 0.9639901 | 0.7405622 |
| D | 5 | 0.9328086 | 0.7405622 |
1.3 Configurar cores para parallel processing
n_cores <- detectCores()
# Evaluate futures in parallel - max of two workers to avoid hogging resources
future::plan("multisession", workers = n_cores)1.4 Set some constants
seed <- 2014
primary_event <- 1
imputs <- 41.5 Importar datos
data_impA <- readRDS(here::here("Data", "Tidy", "data_impA.rds"))
imp.datosA <- complete(data_impA, action = "long", include = TRUE) |>
filter(.imp != 0) |>
filter(.imp < imputs)
imp.datosA2 <- imp.datosA 1.6 Metodo A: Reestimar riesgo basal usando Cox
source(here("Code", "source", "kfre_pi.R"))
source(here("Code", "source", "kfre_pr.R"))
source(here("Code", "source", "oe_ratio.R"))
source(here("Code", "source", "calibration_intercept.R"))
source(here("Code", "source", "calibration_slope.R"))
source(here("Code", "source", "auc.R"))
source(here("Code", "source", "auc_brier_boot.R"))
source(here("Code", "source", "validate.mids.R"))
source(here("Code", "source", "pool.validate.mids.R"))
source(here("Code", "source", "pool.auc.mids.R"))
source(here("Code", "source", "process_imp_cal_plot.R"))
source(here("Code", "source", "predict.mira.R"))
source(here("Code", "source", "performance_measures.R"))
source(here("Code", "source", "tidy_performance_stack.R"))
source(here("Code", "source", "tidy_pool.R"))
source(here("Code", "source", "process_imp_cal_plot2.R"))
source(here("Code", "source", "print_equation.R")) df_recal_metA <- import(here("Data", "Tidy", "equations", "df_recal_modA.rds"))
df_recal_metA2y <- df_recal_metA |>
filter(year == 2) |>
select(-year) |>
rename(st0_imp2y = st0_imp,
fc_coef_imp2y = fc_coef_imp)
df_recal_metA5y <- df_recal_metA |>
filter(year == 5) |>
select(-year) |>
rename(st0_imp5y = st0_imp,
fc_coef_imp5y = fc_coef_imp)
rm(df_recal_metA)
imp.datosA <- imp.datosA2 |>
left_join(df_recal_metA2y, by = ".imp") |>
left_join(df_recal_metA5y, by = ".imp") |>
mutate(eventdf = factor(eventd),
risk2y = 1 - st0_imp2y ^ exp(fc_coef_imp2y * kfre_pi(imp.datosA2)),
risk5y = 1 - st0_imp5y ^ exp(fc_coef_imp5y * kfre_pi(imp.datosA2))) |>
select(.imp, .id, time, eventd, eventdf, risk2y, risk5y)
rm(df_recal_metA2y, df_recal_metA5y)head(imp.datosA)1.6.1 Calibration and Discrimination Measures
future::plan("multisession", workers = n_cores)
results_stack3a4_2y <- tidy_performance_stack(imp.datosA,
horizon = 2,
primary_event = 1,
pred = "risk2y",
seed = seed,
n_cores = n_cores)
gc() used (Mb) gc trigger (Mb) max used (Mb)
Ncells 3765407 201.1 7207984 385.0 7207984 385.0
Vcells 19995951 152.6 119984267 915.5 187462211 1430.3
rio::export(results_stack3a4_2y , here("Data", "Tidy",
"res_valext_kfre_stack3a4_2y_metA.rds"))
future::plan("multisession", workers = n_cores)
results_stack3a4_5y <- tidy_performance_stack(imp.datosA,
horizon = 5,
primary_event = 1,
pred = "risk5y",
seed = seed,
n_cores = n_cores)
rio::export(results_stack3a4_5y, here("Data", "Tidy",
"res_valext_kfre_stack3a4_5y_metA.rds"))
gc() used (Mb) gc trigger (Mb) max used (Mb)
Ncells 3771028 201.4 7207984 385.0 7207984 385.0
Vcells 20009221 152.7 95987414 732.4 187462211 1430.3
res_pool1 <- tidy_pool(results_stack3a4_2y)
res_pool1 |>
kbl() |>
kable_classic(full_width = T, html_font = "Cambria")| term | estimate | ll | ul | pval | ubar | b | t | dfcom | df | riv | lambda | fmi | n |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Average predicted risk | 0.0253658 | NaN | NaN | NA | NaN | 0.0000000 | NaN | 30030 | NA | NaN | NaN | NaN | 30031 |
| Overall observerd risk | 0.0273046 | 0.0254293 | 0.0291799 | 0.0000000 | 0.0000009 | 0.0000000 | 0.0000009 | 30030 | 30030.000000 | 0.0000000 | 0.0000000 | 0.0000666 | 30031 |
| Log OE ratio | 0.0736678 | 0.0030289 | 0.1443067 | 0.0409762 | 0.0012279 | 0.0000501 | 0.0012947 | 30030 | 731.850759 | 0.0544075 | 0.0516001 | 0.0026514 | 30031 |
| OE difference | 0.0019387 | 0.0000178 | 0.0038597 | 0.0479236 | 0.0000009 | 0.0000000 | 0.0000010 | 30030 | 968.279532 | 0.0467635 | 0.0446744 | 0.0020131 | 30031 |
| Calibration Intercept | -0.4082709 | -0.5791688 | -0.2373730 | 0.0002737 | 0.0034439 | 0.0019309 | 0.0060185 | 30030 | 10.922498 | 0.7475660 | 0.4277755 | 0.1129269 | 30031 |
| Calibration Slope | 0.6270993 | 0.5449601 | 0.7092384 | 0.0000005 | 0.0005521 | 0.0004844 | 0.0011979 | 30029 | 6.876937 | 1.1699029 | 0.5391499 | 0.1479052 | 30031 |
| Logit AUC | 2.2297884 | 2.0794618 | 2.3801149 | 0.0000000 | 0.0035248 | 0.0012409 | 0.0051792 | 30030 | 19.580638 | 0.4693847 | 0.3194430 | 0.0744247 | 30031 |
| Brier | 0.0238787 | 0.0220470 | 0.0257104 | 0.0000000 | 0.0000006 | 0.0000002 | 0.0000008 | 30030 | 25.964741 | 0.3838384 | 0.2773723 | 0.0594733 | 30031 |
res_pool2 <- tidy_pool(results_stack3a4_5y)
res_pool2 |>
kbl() |>
kable_classic(full_width = T, html_font = "Cambria")| term | estimate | ll | ul | pval | ubar | b | t | dfcom | df | riv | lambda | fmi | n |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Average predicted risk | 0.0454225 | NaN | NaN | NA | NaN | 1.00e-07 | NaN | 30030 | NA | NaN | NaN | NaN | 30031 |
| Overall observerd risk | 0.0476187 | 0.0450853 | 0.0501521 | 0.0000000 | 0.0000017 | 0.00e+00 | 0.0000017 | 30030 | 30030.00000 | 0.0000000 | 0.0000000 | 0.0000666 | 30031 |
| Log OE ratio | 0.0472319 | -0.0079678 | 0.1024316 | 0.0933453 | 0.0007368 | 3.91e-05 | 0.0007889 | 30030 | 449.99877 | 0.0708125 | 0.0661297 | 0.0042690 | 30031 |
| OE difference | 0.0021962 | -0.0004230 | 0.0048155 | 0.1001135 | 0.0000017 | 1.00e-07 | 0.0000018 | 30030 | 539.07942 | 0.0641981 | 0.0603254 | 0.0035782 | 30031 |
| Calibration Intercept | -0.3416117 | -0.4298735 | -0.2533500 | 0.0000000 | 0.0019609 | 4.83e-05 | 0.0020253 | 30030 | 1855.22148 | 0.0328110 | 0.0317687 | 0.0010592 | 30031 |
| Calibration Slope | 0.6214536 | 0.5681692 | 0.6747380 | 0.0000000 | 0.0003198 | 1.91e-04 | 0.0005745 | 30029 | 10.16851 | 0.7964856 | 0.4433576 | 0.1182095 | 30031 |
| Logit AUC | 2.0378663 | 1.9444443 | 2.1312882 | 0.0000000 | 0.0022485 | 1.73e-05 | 0.0022715 | 30030 | 11756.41892 | 0.0102445 | 0.0101407 | 0.0001692 | 30031 |
| Brier | 0.0385925 | 0.0364634 | 0.0407215 | 0.0000000 | 0.0000010 | 1.00e-07 | 0.0000012 | 30030 | 106.62395 | 0.1583138 | 0.1366761 | 0.0169974 | 30031 |
tab_res_2y <- res_pool1 |>
select(term, estimate, ll, ul) |>
mutate(
across(c(estimate, ll, ul), ~ if_else(term == "Log OE ratio", exp(.x), .x)),
across(c(estimate, ll, ul), ~ if_else(term == "Logit AUC", exp(.x) / (1 + exp(.x)), .x)),
term = if_else(term == "Log OE ratio", "OE ratio", term),
term = if_else(term == "Logit AUC", "AUC", term),
across(c(estimate, ll, ul), ~ if_else(term %in%
c("Average predicted risk",
"Overall observerd risk",
"OE difference"), 100 * .x, .x)),
across(c(estimate, ll, ul), ~ round(.x, 2)),
measures = case_when(term == "Average predicted risk" ~ str_glue("{estimate}%"),
term %in% c("Overall observerd risk", "OE difference") ~ str_glue("{estimate}% ({ll}% to {ul}%)"),
TRUE ~ str_glue("{estimate} ({ll} to {ul})")
)
) |>
select(term, measures) |>
mutate(grupo = c(rep("Calibration", 6), "Discrimination", "Overall performance"),
year = "2 years")
tab_res_5y <- res_pool2 |>
select(term, estimate, ll, ul) |>
mutate(
across(c(estimate, ll, ul), ~ if_else(term == "Log OE ratio", exp(.x), .x)),
across(c(estimate, ll, ul), ~ if_else(term == "Logit AUC", exp(.x) / (1 + exp(.x)), .x)),
term = if_else(term == "Log OE ratio", "OE ratio", term),
term = if_else(term == "Logit AUC", "AUC", term),
across(c(estimate, ll, ul), ~ if_else(term %in%
c("Average predicted risk",
"Overall observerd risk",
"OE difference"), 100 * .x, .x)),
across(c(estimate, ll, ul), ~ round(.x, 2)),
measures = case_when(term == "Average predicted risk" ~ str_glue("{estimate}%"),
term %in% c("Overall observerd risk", "OE difference") ~ str_glue("{estimate}% ({ll}% to {ul}%)"),
TRUE ~ str_glue("{estimate} ({ll} to {ul})")
)
) |>
select(term, measures) |>
mutate(grupo = c(rep("Calibration", 6), "Discrimination", "Overall performance"),
year = "5 years")
tab_res0 <- tab_res_2y |>
bind_rows(tab_res_5y)
tab_res0 |>
as_grouped_data(groups = "year") |>
as_grouped_data(groups = "grupo") |>
flextable::as_flextable(hide_grouplabel = TRUE) |>
autofit() |>
set_header_labels(
year = "Time horizon",
term = "Performance measure",
measures = "Method A"
) |>
bold(j = 1) |>
set_caption("Table. Performance measures of KFRE in the external dataset of patients with CKD Stages 3a-4") |>
theme_booktabs() |>
bold(bold = TRUE, part = "header") -> table_perf_final
table_perf_final %>%
flextable::save_as_docx(path = here("Tables/Table_Imputed_Performance_metA.docx"))
table_perf_finalTime horizon | Performance measure | Method A |
|---|---|---|
2 years | ||
Calibration | ||
Average predicted risk | 2.54% | |
Overall observerd risk | 2.73% (2.54% to 2.92%) | |
OE ratio | 1.08 (1 to 1.16) | |
OE difference | 0.19% (0% to 0.39%) | |
Calibration Intercept | -0.41 (-0.58 to -0.24) | |
Calibration Slope | 0.63 (0.54 to 0.71) | |
Discrimination | ||
AUC | 0.9 (0.89 to 0.92) | |
Overall performance | ||
Brier | 0.02 (0.02 to 0.03) | |
5 years | ||
Calibration | ||
Average predicted risk | 4.54% | |
Overall observerd risk | 4.76% (4.51% to 5.02%) | |
OE ratio | 1.05 (0.99 to 1.11) | |
OE difference | 0.22% (-0.04% to 0.48%) | |
Calibration Intercept | -0.34 (-0.43 to -0.25) | |
Calibration Slope | 0.62 (0.57 to 0.67) | |
Discrimination | ||
AUC | 0.88 (0.87 to 0.89) | |
Overall performance | ||
Brier | 0.04 (0.04 to 0.04) | |
rm(list=ls()[! ls() %in% c("imp.datosA", "imp.datosA2", "vdata",
"primary_event", "horizon",
"process_imp_cal_plot", "seed", "n_cores", "kfre_pi",
"imputs")])
gc() used (Mb) gc trigger (Mb) max used (Mb)
Ncells 3930326 210.0 7207984 385.0 7207984 385.0
Vcells 10209937 77.9 76789932 585.9 187462211 1430.3
1.6.2 Moderate calibration: Calibration curves lowess based on subdistributional hazards
primary_event <- 1
n_internal_knots <- 5
# Seleccion del grupo: Stages 3-4----
# A 2 años----
horizon <- 2
vdata <- imp.datosA %>%
rename(pred = risk2y) |>
select(.imp, .id, time, eventd, pred) |>
mutate(cll_pred = log(-log(1 - pred)))
rcs_vdata <- ns(vdata$cll_pred, df = n_internal_knots + 1)
colnames(rcs_vdata) <- paste0("basisf_", colnames(rcs_vdata))
knots <- attr(rcs_vdata, "knots")
bound.knots <- attr(rcs_vdata, "Boundary.knots")
pp <- seq(min(vdata$pred), max(vdata$pred), length.out = 1000)
cll_pp <- log(-log(1 - pp))
rcs_cll_pp <- ns(cll_pp, knots = knots, Boundary.knots = bound.knots)
colnames(rcs_cll_pp) <- paste0("basisf_", colnames(rcs_cll_pp))
vdata_bis_pp <- cbind(pp, as.data.frame(rcs_cll_pp))
future::plan("multisession", workers = n_cores)
subdist_df_imp <- future_map(1:max(vdata$.imp),
process_imp_cal_plot,
vdata = vdata,
primary_event = primary_event,
horizon = horizon,
type = "subdist_hazard",
n_internal_knots = n_internal_knots,
vdata_bis_pp,
.options = furrr_options(seed = seed,
packages = c("riskRegression",
"survival",
"splines",
"cmprsk",
"tidyverse")),
.progress = TRUE)[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
# 5 knots seems to give somewhat equivalent graph to pseudo method with bw = 0.05
subdist_df_imp_obs <- subdist_df_imp |>
bind_rows() |>
filter(type == "observed")
subdist_df_stack <- subdist_df_imp_obs |>
group_by(.imp) |>
mutate(deciles_risk = as.integer(quantcut(risk, seq(0, 1, by = 0.1)))) |>
group_by(.imp, deciles_risk) |>
summarise(obs_mean_imp = mean(obs),
risk_mean_imp = mean(risk)) |>
group_by(deciles_risk) |>
summarise(obs_mean_pool = mean(obs_mean_imp),
risk_mean_pool = mean(risk_mean_imp))
subdist_df_imp_fix <- subdist_df_imp |>
bind_rows() |>
filter(type == "fixed") |>
arrange(risk) |>
summarise(obs_pool = mean(obs),
.by = risk) |>
mutate(deciles_risk = quantcut(risk, seq(0, 1, by = 0.1)))
rio::export(subdist_df_imp, here("Data", "Tidy", "subdist_df_imput_3a4_2y_metA.rds"))
rio::export(subdist_df_stack, here("Data", "Tidy", "subdist_df_deciles_3a4_2y_metA.rds"))
# Grafico de calibracion
ggplot() +
geom_abline(intercept = 0, slope = 1, colour = "red", linetype = 2) +
geom_line(data = subdist_df_imp_fix,
aes(x = risk, y = obs_pool),
alpha = 0.5, color = "black") +
geom_point(data = subdist_df_stack,
aes(x = risk_mean_pool, y = obs_mean_pool),
shape = 23,
stroke = 0.1,
fill = "blue") +
scale_y_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
scale_x_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
theme_bw() +
labs(x = "Predicted risks", y = "Observed outcome proportions") +
# geom_rug(data = subdist_df_stack,
# aes(x = risk_mean_pool, y = obs_mean_pool),
# alpha = 0.1) +
coord_fixed(ratio = 1, expand = TRUE) -> plot_mod0_2y
# Grafico de calibracion con curvas imputadas
plot_mod0_2y +
geom_line(data = subdist_df_imp_obs,
aes(x = risk, y = obs, group = .imp),
alpha = 0.1, color = "#38B8F7") -> plot_mod0_imp_2y
gc() used (Mb) gc trigger (Mb) max used (Mb)
Ncells 4007596 214.1 7207984 385.0 7207984 385.0
Vcells 11142514 85.1 61431946 468.7 187462211 1430.3
# A 5 años----
horizon <- 5
vdata <- imp.datosA %>%
rename(pred = risk5y) |>
select(.imp, .id, time, eventd, pred) |>
mutate(cll_pred = log(-log(1 - pred)))
rcs_vdata <- ns(vdata$cll_pred, df = n_internal_knots + 1)
colnames(rcs_vdata) <- paste0("basisf_", colnames(rcs_vdata))
knots <- attr(rcs_vdata, "knots")
bound.knots <- attr(rcs_vdata, "Boundary.knots")
pp <- seq(min(vdata$pred), max(vdata$pred), length.out = 1000)
cll_pp <- log(-log(1 - pp))
rcs_cll_pp <- ns(cll_pp, knots = knots, Boundary.knots = bound.knots)
colnames(rcs_cll_pp) <- paste0("basisf_", colnames(rcs_cll_pp))
vdata_bis_pp <- cbind(pp, as.data.frame(rcs_cll_pp))
future::plan("multisession", workers = n_cores)
subdist_df_imp <- future_map(1:max(vdata$.imp),
process_imp_cal_plot,
vdata = vdata,
primary_event = primary_event,
horizon = horizon,
type = "subdist_hazard",
n_internal_knots = n_internal_knots,
vdata_bis_pp,
.options = furrr_options(seed = seed,
packages = c("riskRegression",
"survival",
"splines",
"cmprsk",
"tidyverse")),
.progress = TRUE)[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
# 5 knots seems to give somewhat equivalent graph to pseudo method with bw = 0.05
subdist_df_imp_obs <- subdist_df_imp |>
bind_rows() |>
filter(type == "observed")
subdist_df_stack <- subdist_df_imp_obs |>
group_by(.imp) |>
mutate(deciles_risk = as.integer(quantcut(risk, seq(0, 1, by = 0.1)))) |>
group_by(.imp, deciles_risk) |>
summarise(obs_mean_imp = mean(obs),
risk_mean_imp = mean(risk)) |>
group_by(deciles_risk) |>
summarise(obs_mean_pool = mean(obs_mean_imp),
risk_mean_pool = mean(risk_mean_imp))
subdist_df_imp_fix <- subdist_df_imp |>
bind_rows() |>
filter(type == "fixed") |>
arrange(risk) |>
summarise(obs_pool = mean(obs),
.by = risk) |>
mutate(deciles_risk = quantcut(risk, seq(0, 1, by = 0.1)))
rio::export(subdist_df_imp, here("Data", "Tidy", "subdist_df_imput_3a4_5y_metA.rds"))
rio::export(subdist_df_stack, here("Data", "Tidy", "subdist_df_deciles_3a4_5y_metA.rds"))
# Grafico de calibracion
ggplot() +
geom_abline(intercept = 0, slope = 1, colour = "red", linetype = 2) +
geom_line(data = subdist_df_imp_fix,
aes(x = risk, y = obs_pool),
alpha = 0.5, color = "black") +
geom_point(data = subdist_df_stack,
aes(x = risk_mean_pool, y = obs_mean_pool),
shape = 23,
stroke = 0.1,
fill = "blue",
alpha = 0.5) +
scale_y_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
scale_x_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
theme_bw() +
labs(x = "Predicted risks", y = "Observed outcome proportions") +
# geom_rug(data = subdist_df_stack,
# aes(x = risk_mean_pool, y = obs_mean_pool),
# alpha = 0.1) +
coord_fixed(ratio = 1, expand = TRUE) -> plot_mod0_5y
# Grafico de calibracion con curvas imputadas
plot_mod0_5y +
geom_line(data = subdist_df_imp_obs,
aes(x = risk, y = obs, group = .imp),
alpha = 0.3, color = "#38B8F7") -> plot_mod0_imp_5y
## Grafico final
plot_mod0_2y <- plot_mod0_2y +
labs(title = "Método A\n(2 year KFRE)") +
theme(plot.title = element_text(hjust = 0.5))
plot_mod0_5y <- plot_mod0_5y +
labs(title = "Método A\n(5 year KFRE)") +
theme(plot.title = element_text(hjust = 0.5))
plot_mod0_imp_2y <- plot_mod0_imp_2y +
labs(title = "Método A\n(2 year KFRE)") +
theme(plot.title = element_text(hjust = 0.5))
plot_mod0_imp_5y <- plot_mod0_imp_5y +
labs(title = "Método A\n(5 year KFRE)") +
theme(plot.title = element_text(hjust = 0.5))
plot_cal_mod0 <- plot_mod0_2y / plot_mod0_5y
plot_cal_imp_mod0 <- plot_mod0_imp_2y / plot_mod0_imp_5y
export(plot_mod0_2y, here("Data", "Tidy", "plot_metA_2y.rds"))
export(plot_mod0_5y, here("Data", "Tidy", "plot_metA_5y.rds"))
export(plot_cal_mod0, here("Data", "Tidy", "plot_cal_metA.rds"))
export(plot_mod0_imp_2y, here("Data", "Tidy", "plot_metA_imp_2y.rds"))
export(plot_mod0_imp_5y, here("Data", "Tidy", "plot_metA_imp_5y.rds"))
export(plot_cal_imp_mod0, here("Data", "Tidy", "plot_cal_imp_metA.rds"))
ggsave(filename = "Plot_Calibration_imputed_metA.png",
device = "png",
plot = plot_cal_mod0,
path = here("Figures"),
scale = 2,
width = 12,
height = 12,
units = "cm",
dpi = 600)
ggsave(filename = "Plot_Calibration_imputed_estabilidad_metA.png",
device = "png",
plot = plot_cal_imp_mod0,
path = here("Figures"),
scale = 2,
width = 12,
height = 12,
units = "cm",
dpi = 600)
gc() used (Mb) gc trigger (Mb) max used (Mb)
Ncells 4082740 218.1 7207984 385 7207984 385.0
Vcells 11657615 89.0 49145557 375 187462211 1430.3
knitr::include_graphics(here("Figures", "Plot_Calibration_imputed_metA.png"))knitr::include_graphics(here("Figures", "Plot_Calibration_imputed_estabilidad_metA.png"))1.7 Metodo B: Reestimar coeficientes mediante Cox
source(here("Code", "source", "kfre_pi.R"))
source(here("Code", "source", "kfre_pr.R"))
source(here("Code", "source", "oe_ratio.R"))
source(here("Code", "source", "calibration_intercept.R"))
source(here("Code", "source", "calibration_slope.R"))
source(here("Code", "source", "auc.R"))
source(here("Code", "source", "auc_brier_boot.R"))
source(here("Code", "source", "validate.mids.R"))
source(here("Code", "source", "pool.validate.mids.R"))
source(here("Code", "source", "pool.auc.mids.R"))
source(here("Code", "source", "process_imp_cal_plot.R"))
source(here("Code", "source", "predict.mira.R"))
source(here("Code", "source", "performance_measures.R"))
source(here("Code", "source", "tidy_performance_stack.R"))
source(here("Code", "source", "tidy_pool.R"))
source(here("Code", "source", "process_imp_cal_plot2.R"))
source(here("Code", "source", "print_equation.R")) df_recal_metB <- import(here("Data", "Tidy", "equations", "df_recal_modB.rds"))
df_recal_metB2y <- df_recal_metB |>
filter(year == 2) |>
select(-year) |>
rename(st0_imp2y = st0_imp,
fc_coef_imp2y = fc_coef_imp)
df_recal_metB5y <- df_recal_metB |>
filter(year == 5) |>
select(-year) |>
rename(st0_imp5y = st0_imp,
fc_coef_imp5y = fc_coef_imp)
rm(df_recal_metB)
gc() used (Mb) gc trigger (Mb) max used (Mb)
Ncells 4086638 218.3 7207984 385 7207984 385.0
Vcells 11653610 89.0 39316446 300 187462211 1430.3
imp.datosA <- imp.datosA2 |>
left_join(df_recal_metB2y, by = ".imp") |>
left_join(df_recal_metB5y, by = ".imp") |>
mutate(eventdf = factor(eventd),
risk2y = 1 - st0_imp2y ^ exp(fc_coef_imp2y * kfre_pi(imp.datosA2)),
risk5y = 1 - st0_imp5y ^ exp(fc_coef_imp5y * kfre_pi(imp.datosA2))) |>
select(.imp, .id, time, eventd, eventdf, risk2y, risk5y)
rm(df_recal_metB2y, df_recal_metB5y)head(imp.datosA)1.7.1 Calibration and Discrimination Measures
future::plan("multisession", workers = n_cores)
results_stack3a4_2y <- tidy_performance_stack(imp.datosA,
horizon = 2,
primary_event = 1,
pred = "risk2y",
seed = seed,
n_cores = n_cores)
gc() used (Mb) gc trigger (Mb) max used (Mb)
Ncells 4087930 218.4 7207984 385 7207984 385.0
Vcells 12017275 91.7 39316446 300 187462211 1430.3
rio::export(results_stack3a4_2y , here("Data", "Tidy",
"res_valext_kfre_stack3a4_2y_metB.rds"))
future::plan("multisession", workers = n_cores)
results_stack3a4_5y <- tidy_performance_stack(imp.datosA,
horizon = 5,
primary_event = 1,
pred = "risk5y",
seed = seed,
n_cores = n_cores)
rio::export(results_stack3a4_5y, here("Data", "Tidy",
"res_valext_kfre_stack3a4_5y_metB.rds"))
gc() used (Mb) gc trigger (Mb) max used (Mb)
Ncells 4088044 218.4 7207984 385 7207984 385.0
Vcells 12017543 91.7 39316446 300 187462211 1430.3
res_pool1 <- tidy_pool(results_stack3a4_2y)
res_pool1 |>
kbl() |>
kable_classic(full_width = T, html_font = "Cambria")| term | estimate | ll | ul | pval | ubar | b | t | dfcom | df | riv | lambda | fmi | n |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Average predicted risk | 0.0281935 | NaN | NaN | NA | NaN | 0.0000000 | NaN | 30030 | NA | NaN | NaN | NaN | 30031 |
| Overall observerd risk | 0.0273046 | 0.0254293 | 0.0291799 | 0.0000000 | 0.0000009 | 0.0000000 | 0.0000009 | 30030 | 30030.00000 | 0.0000000 | 0.0000000 | 0.0000666 | 30031 |
| Log OE ratio | -0.0320351 | -0.1007684 | 0.0366983 | 0.3609698 | 0.0012279 | 0.0000014 | 0.0012297 | 30030 | 28989.54440 | 0.0015139 | 0.0015116 | 0.0000689 | 30031 |
| OE difference | -0.0008889 | -0.0027657 | 0.0009880 | 0.3532645 | 0.0000009 | 0.0000000 | 0.0000009 | 30030 | 28857.26109 | 0.0016134 | 0.0016108 | 0.0000692 | 30031 |
| Calibration Intercept | -0.0523812 | -0.1581912 | 0.0534289 | 0.3260322 | 0.0022879 | 0.0003826 | 0.0027981 | 30030 | 60.01153 | 0.2229924 | 0.1823334 | 0.0288466 | 30031 |
| Calibration Slope | 0.8493046 | 0.7625945 | 0.9360148 | 0.0000000 | 0.0010126 | 0.0004644 | 0.0016318 | 30029 | 13.88045 | 0.6114667 | 0.3794473 | 0.0960018 | 30031 |
| Logit AUC | 2.2297884 | 2.0794618 | 2.3801149 | 0.0000000 | 0.0035248 | 0.0012409 | 0.0051792 | 30030 | 19.58064 | 0.4693847 | 0.3194430 | 0.0744247 | 30031 |
| Brier | 0.0229523 | 0.0213269 | 0.0245776 | 0.0000000 | 0.0000005 | 0.0000001 | 0.0000007 | 30030 | 55.60918 | 0.2336973 | 0.1894284 | 0.0308923 | 30031 |
res_pool2 <- tidy_pool(results_stack3a4_5y)
res_pool2 |>
kbl() |>
kable_classic(full_width = T, html_font = "Cambria")| term | estimate | ll | ul | pval | ubar | b | t | dfcom | df | riv | lambda | fmi | n |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Average predicted risk | 0.0523210 | NaN | NaN | NA | NaN | 0.00e+00 | NaN | 30030 | NA | NaN | NaN | NaN | 30031 |
| Overall observerd risk | 0.0476187 | 0.0450853 | 0.0501521 | 0.0000000 | 0.0000017 | 0.00e+00 | 0.0000017 | 30030 | 30030.00000 | 0.0000000 | 0.0000000 | 0.0000666 | 30031 |
| Log OE ratio | -0.0941725 | -0.1473790 | -0.0409660 | 0.0005228 | 0.0007368 | 1.00e-07 | 0.0007369 | 30030 | 30010.28479 | 0.0001678 | 0.0001678 | 0.0000666 | 30031 |
| OE difference | -0.0047023 | -0.0072360 | -0.0021686 | 0.0002755 | 0.0000017 | 0.00e+00 | 0.0000017 | 30030 | 30003.44017 | 0.0002026 | 0.0002026 | 0.0000666 | 30031 |
| Calibration Intercept | -0.1398527 | -0.2120131 | -0.0676924 | 0.0001460 | 0.0013436 | 8.80e-06 | 0.0013553 | 30030 | 14141.47011 | 0.0086914 | 0.0086165 | 0.0001408 | 30031 |
| Calibration Slope | 0.8417300 | 0.7871257 | 0.8963344 | 0.0000000 | 0.0005867 | 1.13e-04 | 0.0007374 | 30029 | 47.77946 | 0.2568972 | 0.2043900 | 0.0353610 | 30031 |
| Logit AUC | 2.0378663 | 1.9444443 | 2.1312882 | 0.0000000 | 0.0022485 | 1.73e-05 | 0.0022715 | 30030 | 11756.41892 | 0.0102445 | 0.0101407 | 0.0001692 | 30031 |
| Brier | 0.0371864 | 0.0352595 | 0.0391134 | 0.0000000 | 0.0000009 | 1.00e-07 | 0.0000010 | 30030 | 194.08812 | 0.1125276 | 0.1011459 | 0.0096345 | 30031 |
tab_res_2y <- res_pool1 |>
select(term, estimate, ll, ul) |>
mutate(
across(c(estimate, ll, ul), ~ if_else(term == "Log OE ratio", exp(.x), .x)),
across(c(estimate, ll, ul), ~ if_else(term == "Logit AUC", exp(.x) / (1 + exp(.x)), .x)),
term = if_else(term == "Log OE ratio", "OE ratio", term),
term = if_else(term == "Logit AUC", "AUC", term),
across(c(estimate, ll, ul), ~ if_else(term %in%
c("Average predicted risk",
"Overall observerd risk",
"OE difference"), 100 * .x, .x)),
across(c(estimate, ll, ul), ~ round(.x, 2)),
measures = case_when(term == "Average predicted risk" ~ str_glue("{estimate}%"),
term %in% c("Overall observerd risk", "OE difference") ~ str_glue("{estimate}% ({ll}% to {ul}%)"),
TRUE ~ str_glue("{estimate} ({ll} to {ul})")
)
) |>
select(term, measures) |>
mutate(grupo = c(rep("Calibration", 6), "Discrimination", "Overall performance"),
year = "2 years")
tab_res_5y <- res_pool2 |>
select(term, estimate, ll, ul) |>
mutate(
across(c(estimate, ll, ul), ~ if_else(term == "Log OE ratio", exp(.x), .x)),
across(c(estimate, ll, ul), ~ if_else(term == "Logit AUC", exp(.x) / (1 + exp(.x)), .x)),
term = if_else(term == "Log OE ratio", "OE ratio", term),
term = if_else(term == "Logit AUC", "AUC", term),
across(c(estimate, ll, ul), ~ if_else(term %in%
c("Average predicted risk",
"Overall observerd risk",
"OE difference"), 100 * .x, .x)),
across(c(estimate, ll, ul), ~ round(.x, 2)),
measures = case_when(term == "Average predicted risk" ~ str_glue("{estimate}%"),
term %in% c("Overall observerd risk", "OE difference") ~ str_glue("{estimate}% ({ll}% to {ul}%)"),
TRUE ~ str_glue("{estimate} ({ll} to {ul})")
)
) |>
select(term, measures) |>
mutate(grupo = c(rep("Calibration", 6), "Discrimination", "Overall performance"),
year = "5 years")
tab_res0 <- tab_res_2y |>
bind_rows(tab_res_5y)
tab_res0 |>
as_grouped_data(groups = "year") |>
as_grouped_data(groups = "grupo") |>
flextable::as_flextable(hide_grouplabel = TRUE) |>
autofit() |>
set_header_labels(
year = "Time horizon",
term = "Performance measure",
measures = "Method B"
) |>
bold(j = 1) |>
set_caption("Table. Performance measures of KFRE in the external dataset of patients with CKD Stages 3a-4") |>
theme_booktabs() |>
bold(bold = TRUE, part = "header") -> table_perf_final
table_perf_final %>%
flextable::save_as_docx(path = here("Tables/Table_Imputed_Performance_metB.docx"))
table_perf_finalTime horizon | Performance measure | Method B |
|---|---|---|
2 years | ||
Calibration | ||
Average predicted risk | 2.82% | |
Overall observerd risk | 2.73% (2.54% to 2.92%) | |
OE ratio | 0.97 (0.9 to 1.04) | |
OE difference | -0.09% (-0.28% to 0.1%) | |
Calibration Intercept | -0.05 (-0.16 to 0.05) | |
Calibration Slope | 0.85 (0.76 to 0.94) | |
Discrimination | ||
AUC | 0.9 (0.89 to 0.92) | |
Overall performance | ||
Brier | 0.02 (0.02 to 0.02) | |
5 years | ||
Calibration | ||
Average predicted risk | 5.23% | |
Overall observerd risk | 4.76% (4.51% to 5.02%) | |
OE ratio | 0.91 (0.86 to 0.96) | |
OE difference | -0.47% (-0.72% to -0.22%) | |
Calibration Intercept | -0.14 (-0.21 to -0.07) | |
Calibration Slope | 0.84 (0.79 to 0.9) | |
Discrimination | ||
AUC | 0.88 (0.87 to 0.89) | |
Overall performance | ||
Brier | 0.04 (0.04 to 0.04) | |
rm(list=ls()[! ls() %in% c("imp.datosA","imp.datosA2", "vdata",
"primary_event", "horizon",
"process_imp_cal_plot", "seed", "n_cores", "kfre_pi",
"imputs")])
gc() used (Mb) gc trigger (Mb) max used (Mb)
Ncells 4082843 218.1 7207984 385 7207984 385.0
Vcells 11090053 84.7 39316446 300 187462211 1430.3
1.7.2 Moderate calibration: Calibration curves lowess based on subdistributional hazards
primary_event <- 1
n_internal_knots <- 5
# Seleccion del grupo: Stages 3-4----
# A 2 años----
horizon <- 2
vdata <- imp.datosA %>%
rename(pred = risk2y) |>
select(.imp, .id, time, eventd, pred) |>
mutate(cll_pred = log(-log(1 - pred)))
rcs_vdata <- ns(vdata$cll_pred, df = n_internal_knots + 1)
colnames(rcs_vdata) <- paste0("basisf_", colnames(rcs_vdata))
knots <- attr(rcs_vdata, "knots")
bound.knots <- attr(rcs_vdata, "Boundary.knots")
pp <- seq(min(vdata$pred), max(vdata$pred), length.out = 1000)
cll_pp <- log(-log(1 - pp))
rcs_cll_pp <- ns(cll_pp, knots = knots, Boundary.knots = bound.knots)
colnames(rcs_cll_pp) <- paste0("basisf_", colnames(rcs_cll_pp))
vdata_bis_pp <- cbind(pp, as.data.frame(rcs_cll_pp))
future::plan("multisession", workers = n_cores)
subdist_df_imp <- future_map(1:max(vdata$.imp),
process_imp_cal_plot,
vdata = vdata,
primary_event = primary_event,
horizon = horizon,
type = "subdist_hazard",
n_internal_knots = n_internal_knots,
vdata_bis_pp,
.options = furrr_options(seed = seed,
packages = c("riskRegression",
"survival",
"splines",
"cmprsk",
"tidyverse")),
.progress = TRUE)[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
# 5 knots seems to give somewhat equivalent graph to pseudo method with bw = 0.05
subdist_df_imp_obs <- subdist_df_imp |>
bind_rows() |>
filter(type == "observed")
subdist_df_stack <- subdist_df_imp_obs |>
group_by(.imp) |>
mutate(deciles_risk = as.integer(quantcut(risk, seq(0, 1, by = 0.1)))) |>
group_by(.imp, deciles_risk) |>
summarise(obs_mean_imp = mean(obs),
risk_mean_imp = mean(risk)) |>
group_by(deciles_risk) |>
summarise(obs_mean_pool = mean(obs_mean_imp),
risk_mean_pool = mean(risk_mean_imp))
subdist_df_imp_fix <- subdist_df_imp |>
bind_rows() |>
filter(type == "fixed") |>
arrange(risk) |>
summarise(obs_pool = mean(obs),
.by = risk) |>
mutate(deciles_risk = quantcut(risk, seq(0, 1, by = 0.1)))
rio::export(subdist_df_imp, here("Data", "Tidy", "subdist_df_imput_3a4_2y_metB.rds"))
rio::export(subdist_df_stack, here("Data", "Tidy", "subdist_df_deciles_3a4_2y_metB.rds"))
# Grafico de calibracion
ggplot() +
geom_abline(intercept = 0, slope = 1, colour = "red", linetype = 2) +
geom_line(data = subdist_df_imp_fix,
aes(x = risk, y = obs_pool),
alpha = 0.5, color = "black") +
geom_point(data = subdist_df_stack,
aes(x = risk_mean_pool, y = obs_mean_pool),
shape = 23,
stroke = 0.1,
fill = "blue") +
scale_y_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
scale_x_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
theme_bw() +
labs(x = "Predicted risks", y = "Observed outcome proportions") +
# geom_rug(data = subdist_df_stack,
# aes(x = risk_mean_pool, y = obs_mean_pool),
# alpha = 0.1) +
coord_fixed(ratio = 1, expand = TRUE) -> plot_mod0_2y
# Grafico de calibracion con curvas imputadas
plot_mod0_2y +
geom_line(data = subdist_df_imp_obs,
aes(x = risk, y = obs, group = .imp),
alpha = 0.1, color = "#38B8F7") -> plot_mod0_imp_2y
gc() used (Mb) gc trigger (Mb) max used (Mb)
Ncells 4082116 218.1 7207984 385 7207984 385.0
Vcells 11306583 86.3 39316446 300 187462211 1430.3
# A 5 años----
horizon <- 5
vdata <- imp.datosA %>%
rename(pred = risk5y) |>
select(.imp, .id, time, eventd, pred) |>
mutate(cll_pred = log(-log(1 - pred)))
rcs_vdata <- ns(vdata$cll_pred, df = n_internal_knots + 1)
colnames(rcs_vdata) <- paste0("basisf_", colnames(rcs_vdata))
knots <- attr(rcs_vdata, "knots")
bound.knots <- attr(rcs_vdata, "Boundary.knots")
pp <- seq(min(vdata$pred), max(vdata$pred), length.out = 1000)
cll_pp <- log(-log(1 - pp))
rcs_cll_pp <- ns(cll_pp, knots = knots, Boundary.knots = bound.knots)
colnames(rcs_cll_pp) <- paste0("basisf_", colnames(rcs_cll_pp))
vdata_bis_pp <- cbind(pp, as.data.frame(rcs_cll_pp))
future::plan("multisession", workers = n_cores)
subdist_df_imp <- future_map(1:max(vdata$.imp),
process_imp_cal_plot,
vdata = vdata,
primary_event = primary_event,
horizon = horizon,
type = "subdist_hazard",
n_internal_knots = n_internal_knots,
vdata_bis_pp,
.options = furrr_options(seed = seed,
packages = c("riskRegression",
"survival",
"splines",
"cmprsk",
"tidyverse")),
.progress = TRUE)[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
# 5 knots seems to give somewhat equivalent graph to pseudo method with bw = 0.05
subdist_df_imp_obs <- subdist_df_imp |>
bind_rows() |>
filter(type == "observed")
subdist_df_stack <- subdist_df_imp_obs |>
group_by(.imp) |>
mutate(deciles_risk = as.integer(quantcut(risk, seq(0, 1, by = 0.1)))) |>
group_by(.imp, deciles_risk) |>
summarise(obs_mean_imp = mean(obs),
risk_mean_imp = mean(risk)) |>
group_by(deciles_risk) |>
summarise(obs_mean_pool = mean(obs_mean_imp),
risk_mean_pool = mean(risk_mean_imp))
subdist_df_imp_fix <- subdist_df_imp |>
bind_rows() |>
filter(type == "fixed") |>
arrange(risk) |>
summarise(obs_pool = mean(obs),
.by = risk) |>
mutate(deciles_risk = quantcut(risk, seq(0, 1, by = 0.1)))
rio::export(subdist_df_imp, here("Data", "Tidy", "subdist_df_imput_3a4_5y_metB.rds"))
rio::export(subdist_df_stack, here("Data", "Tidy", "subdist_df_deciles_3a4_5y_metB.rds"))
# Grafico de calibracion
ggplot() +
geom_abline(intercept = 0, slope = 1, colour = "red", linetype = 2) +
geom_line(data = subdist_df_imp_fix,
aes(x = risk, y = obs_pool),
alpha = 0.5, color = "black") +
geom_point(data = subdist_df_stack,
aes(x = risk_mean_pool, y = obs_mean_pool),
shape = 23,
stroke = 0.1,
fill = "blue",
alpha = 0.5) +
scale_y_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
scale_x_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
theme_bw() +
labs(x = "Predicted risks", y = "Observed outcome proportions") +
# geom_rug(data = subdist_df_stack,
# aes(x = risk_mean_pool, y = obs_mean_pool),
# alpha = 0.1) +
coord_fixed(ratio = 1, expand = TRUE) -> plot_mod0_5y
# Grafico de calibracion con curvas imputadas
plot_mod0_5y +
geom_line(data = subdist_df_imp_obs,
aes(x = risk, y = obs, group = .imp),
alpha = 0.3, color = "#38B8F7") -> plot_mod0_imp_5y
## Grafico final
plot_mod0_2y <- plot_mod0_2y +
labs(title = "Método B\n(2 year KFRE)") +
theme(plot.title = element_text(hjust = 0.5))
plot_mod0_5y <- plot_mod0_5y +
labs(title = "Método B\n(5 year KFRE)") +
theme(plot.title = element_text(hjust = 0.5))
plot_mod0_imp_2y <- plot_mod0_imp_2y +
labs(title = "Método B\n(2 year KFRE)") +
theme(plot.title = element_text(hjust = 0.5))
plot_mod0_imp_5y <- plot_mod0_imp_5y +
labs(title = "Método B\n(5 year KFRE)") +
theme(plot.title = element_text(hjust = 0.5))
plot_cal_mod0 <- plot_mod0_2y / plot_mod0_5y
plot_cal_imp_mod0 <- plot_mod0_imp_2y / plot_mod0_imp_5y
export(plot_mod0_2y, here("Data", "Tidy", "plot_metB_2y.rds"))
export(plot_mod0_5y, here("Data", "Tidy", "plot_metB_5y.rds"))
export(plot_cal_mod0, here("Data", "Tidy", "plot_cal_metB.rds"))
export(plot_mod0_imp_2y, here("Data", "Tidy", "plot_metB_imp_2y.rds"))
export(plot_mod0_imp_5y, here("Data", "Tidy", "plot_metB_imp_5y.rds"))
export(plot_cal_imp_mod0, here("Data", "Tidy", "plot_cal_imp_metB.rds"))
ggsave(filename = "Plot_Calibration_imputed_metB.png",
device = "png",
plot = plot_cal_mod0,
path = here("Figures"),
scale = 2,
width = 12,
height = 12,
units = "cm",
dpi = 600)
ggsave(filename = "Plot_Calibration_imputed_estabilidad_metB.png",
device = "png",
plot = plot_cal_imp_mod0,
path = here("Figures"),
scale = 2,
width = 12,
height = 12,
units = "cm",
dpi = 600)
gc() used (Mb) gc trigger (Mb) max used (Mb)
Ncells 4086165 218.3 7207984 385.0 7207984 385.0
Vcells 11674768 89.1 39339461 300.2 187462211 1430.3
knitr::include_graphics(here("Figures", "Plot_Calibration_imputed_metB.png"))knitr::include_graphics(here("Figures", "Plot_Calibration_imputed_estabilidad_metB.png"))1.8 Metodo C: Reestimar riesgo basal usando Cause-specific Hazard Models
source(here("Code", "source", "kfre_pi.R"))
source(here("Code", "source", "kfre_pr.R"))
source(here("Code", "source", "oe_ratio.R"))
source(here("Code", "source", "calibration_intercept.R"))
source(here("Code", "source", "calibration_slope.R"))
source(here("Code", "source", "auc.R"))
source(here("Code", "source", "auc_brier_boot.R"))
source(here("Code", "source", "validate.mids.R"))
source(here("Code", "source", "pool.validate.mids.R"))
source(here("Code", "source", "pool.auc.mids.R"))
source(here("Code", "source", "process_imp_cal_plot.R"))
source(here("Code", "source", "predict.mira.R"))
source(here("Code", "source", "performance_measures.R"))
source(here("Code", "source", "tidy_performance_stack.R"))
source(here("Code", "source", "tidy_pool.R"))
source(here("Code", "source", "process_imp_cal_plot2.R"))
source(here("Code", "source", "print_equation.R")) df_recal_metC <- import(here("Data", "Tidy", "equations", "df_recal_modC.rds"))
df_recal_metC2y <- df_recal_metC |>
filter(year == 2) |>
select(-year) |>
rename(st0_imp2y = st0_imp,
fc_coef_imp2y = fc_coef_imp)
df_recal_metC5y <- df_recal_metC |>
filter(year == 5) |>
select(-year) |>
rename(st0_imp5y = st0_imp,
fc_coef_imp5y = fc_coef_imp)
rm(df_recal_metC)
gc() used (Mb) gc trigger (Mb) max used (Mb)
Ncells 4087358 218.3 7207984 385.0 7207984 385.0
Vcells 11662608 89.0 52534569 400.9 187462211 1430.3
imp.datosA <- imp.datosA2 |>
left_join(df_recal_metC2y, by = ".imp") |>
left_join(df_recal_metC5y, by = ".imp") |>
mutate(eventdf = factor(eventd),
risk2y = 1 - st0_imp2y ^ exp(fc_coef_imp2y * kfre_pi(imp.datosA2)),
risk5y = 1 - st0_imp5y ^ exp(fc_coef_imp5y * kfre_pi(imp.datosA2))) |>
select(.imp, .id, time, eventd, eventdf, risk2y, risk5y)
rm(df_recal_metC2y, df_recal_metC5y)head(imp.datosA)1.8.1 Calibration and Discrimination Measures
future::plan("multisession", workers = n_cores)
results_stack3a4_2y <- tidy_performance_stack(imp.datosA,
horizon = 2,
primary_event = 1,
pred = "risk2y",
seed = seed,
n_cores = n_cores)
gc() used (Mb) gc trigger (Mb) max used (Mb)
Ncells 4088314 218.4 7207984 385.0 7207984 385.0
Vcells 12024235 91.8 42027656 320.7 187462211 1430.3
rio::export(results_stack3a4_2y , here("Data", "Tidy",
"res_valext_kfre_stack3a4_2y_metC.rds"))
future::plan("multisession", workers = n_cores)
results_stack3a4_5y <- tidy_performance_stack(imp.datosA,
horizon = 5,
primary_event = 1,
pred = "risk5y",
seed = seed,
n_cores = n_cores)
rio::export(results_stack3a4_5y, here("Data", "Tidy",
"res_valext_kfre_stack3a4_5y_metC.rds"))
gc() used (Mb) gc trigger (Mb) max used (Mb)
Ncells 4088309 218.4 7207984 385.0 7207984 385.0
Vcells 12024308 91.8 42027656 320.7 187462211 1430.3
res_pool1 <- tidy_pool(results_stack3a4_2y)
res_pool1 |>
kbl() |>
kable_classic(full_width = T, html_font = "Cambria")| term | estimate | ll | ul | pval | ubar | b | t | dfcom | df | riv | lambda | fmi | n |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Average predicted risk | 0.0248293 | NaN | NaN | NA | NaN | 0.0000000 | NaN | 30030 | NA | NaN | NaN | NaN | 30031 |
| Overall observerd risk | 0.0273046 | 0.0254293 | 0.0291799 | 0.0000000 | 0.0000009 | 0.0000000 | 0.0000009 | 30030 | 30030.000000 | 0.0000000 | 0.0000000 | 0.0000666 | 30031 |
| Log OE ratio | 0.0950470 | 0.0241751 | 0.1659188 | 0.0086603 | 0.0012279 | 0.0000558 | 0.0013023 | 30030 | 599.688156 | 0.0605974 | 0.0571352 | 0.0032237 | 30031 |
| OE difference | 0.0024752 | 0.0005511 | 0.0043994 | 0.0117536 | 0.0000009 | 0.0000000 | 0.0000010 | 30030 | 859.040853 | 0.0498920 | 0.0475211 | 0.0022649 | 30031 |
| Calibration Intercept | -0.3813514 | -0.5512420 | -0.2114609 | 0.0004315 | 0.0034439 | 0.0018988 | 0.0059757 | 30030 | 11.134817 | 0.7351346 | 0.4236758 | 0.1115207 | 30031 |
| Calibration Slope | 0.6270993 | 0.5449601 | 0.7092384 | 0.0000005 | 0.0005521 | 0.0004844 | 0.0011979 | 30029 | 6.876937 | 1.1699029 | 0.5391499 | 0.1479052 | 30031 |
| Logit AUC | 2.2297884 | 2.0794618 | 2.3801149 | 0.0000000 | 0.0035248 | 0.0012409 | 0.0051792 | 30030 | 19.580638 | 0.4693847 | 0.3194430 | 0.0744247 | 30031 |
| Brier | 0.0238312 | 0.0220036 | 0.0256588 | 0.0000000 | 0.0000006 | 0.0000002 | 0.0000008 | 30030 | 26.672392 | 0.3767740 | 0.2736644 | 0.0581799 | 30031 |
res_pool2 <- tidy_pool(results_stack3a4_5y)
res_pool2 |>
kbl() |>
kable_classic(full_width = T, html_font = "Cambria")| term | estimate | ll | ul | pval | ubar | b | t | dfcom | df | riv | lambda | fmi | n |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Average predicted risk | 0.0427432 | NaN | NaN | NA | NaN | 1.00e-07 | NaN | 30030 | NA | NaN | NaN | NaN | 30031 |
| Overall observerd risk | 0.0476187 | 0.0450853 | 0.0501521 | 0.0000000 | 0.0000017 | 0.00e+00 | 0.0000017 | 30030 | 30030.00000 | 0.0000000 | 0.0000000 | 0.0000666 | 30031 |
| Log OE ratio | 0.1080349 | 0.0519826 | 0.1640872 | 0.0001853 | 0.0007368 | 5.48e-05 | 0.0008098 | 30030 | 243.75412 | 0.0991142 | 0.0901764 | 0.0077398 | 30031 |
| OE difference | 0.0048756 | 0.0022347 | 0.0075164 | 0.0003234 | 0.0000017 | 1.00e-07 | 0.0000018 | 30030 | 363.80816 | 0.0795149 | 0.0736580 | 0.0052516 | 30031 |
| Calibration Intercept | -0.2568547 | -0.3447924 | -0.1689171 | 0.0000000 | 0.0019609 | 3.78e-05 | 0.0020113 | 30030 | 2870.42137 | 0.0257134 | 0.0250688 | 0.0006873 | 30031 |
| Calibration Slope | 0.6214536 | 0.5681692 | 0.6747380 | 0.0000000 | 0.0003198 | 1.91e-04 | 0.0005745 | 30029 | 10.16851 | 0.7964856 | 0.4433576 | 0.1182095 | 30031 |
| Logit AUC | 2.0378663 | 1.9444443 | 2.1312882 | 0.0000000 | 0.0022485 | 1.73e-05 | 0.0022715 | 30030 | 11756.41892 | 0.0102445 | 0.0101407 | 0.0001692 | 30031 |
| Brier | 0.0384086 | 0.0362677 | 0.0405495 | 0.0000000 | 0.0000010 | 1.00e-07 | 0.0000012 | 30030 | 100.52540 | 0.1638412 | 0.1407762 | 0.0179591 | 30031 |
tab_res_2y <- res_pool1 |>
select(term, estimate, ll, ul) |>
mutate(
across(c(estimate, ll, ul), ~ if_else(term == "Log OE ratio", exp(.x), .x)),
across(c(estimate, ll, ul), ~ if_else(term == "Logit AUC", exp(.x) / (1 + exp(.x)), .x)),
term = if_else(term == "Log OE ratio", "OE ratio", term),
term = if_else(term == "Logit AUC", "AUC", term),
across(c(estimate, ll, ul), ~ if_else(term %in%
c("Average predicted risk",
"Overall observerd risk",
"OE difference"), 100 * .x, .x)),
across(c(estimate, ll, ul), ~ round(.x, 2)),
measures = case_when(term == "Average predicted risk" ~ str_glue("{estimate}%"),
term %in% c("Overall observerd risk", "OE difference") ~ str_glue("{estimate}% ({ll}% to {ul}%)"),
TRUE ~ str_glue("{estimate} ({ll} to {ul})")
)
) |>
select(term, measures) |>
mutate(grupo = c(rep("Calibration", 6), "Discrimination", "Overall performance"),
year = "2 years")
tab_res_5y <- res_pool2 |>
select(term, estimate, ll, ul) |>
mutate(
across(c(estimate, ll, ul), ~ if_else(term == "Log OE ratio", exp(.x), .x)),
across(c(estimate, ll, ul), ~ if_else(term == "Logit AUC", exp(.x) / (1 + exp(.x)), .x)),
term = if_else(term == "Log OE ratio", "OE ratio", term),
term = if_else(term == "Logit AUC", "AUC", term),
across(c(estimate, ll, ul), ~ if_else(term %in%
c("Average predicted risk",
"Overall observerd risk",
"OE difference"), 100 * .x, .x)),
across(c(estimate, ll, ul), ~ round(.x, 2)),
measures = case_when(term == "Average predicted risk" ~ str_glue("{estimate}%"),
term %in% c("Overall observerd risk", "OE difference") ~ str_glue("{estimate}% ({ll}% to {ul}%)"),
TRUE ~ str_glue("{estimate} ({ll} to {ul})")
)
) |>
select(term, measures) |>
mutate(grupo = c(rep("Calibration", 6), "Discrimination", "Overall performance"),
year = "5 years")
tab_res0 <- tab_res_2y |>
bind_rows(tab_res_5y)
tab_res0 |>
as_grouped_data(groups = "year") |>
as_grouped_data(groups = "grupo") |>
flextable::as_flextable(hide_grouplabel = TRUE) |>
autofit() |>
set_header_labels(
year = "Time horizon",
term = "Performance measure",
measures = "Method C"
) |>
bold(j = 1) |>
set_caption("Table. Performance measures of KFRE in the external dataset of patients with CKD Stages 3a-4") |>
theme_booktabs() |>
bold(bold = TRUE, part = "header") -> table_perf_final
table_perf_final %>%
flextable::save_as_docx(path = here("Tables/Table_Imputed_Performance_metC.docx"))
table_perf_finalTime horizon | Performance measure | Method C |
|---|---|---|
2 years | ||
Calibration | ||
Average predicted risk | 2.48% | |
Overall observerd risk | 2.73% (2.54% to 2.92%) | |
OE ratio | 1.1 (1.02 to 1.18) | |
OE difference | 0.25% (0.06% to 0.44%) | |
Calibration Intercept | -0.38 (-0.55 to -0.21) | |
Calibration Slope | 0.63 (0.54 to 0.71) | |
Discrimination | ||
AUC | 0.9 (0.89 to 0.92) | |
Overall performance | ||
Brier | 0.02 (0.02 to 0.03) | |
5 years | ||
Calibration | ||
Average predicted risk | 4.27% | |
Overall observerd risk | 4.76% (4.51% to 5.02%) | |
OE ratio | 1.11 (1.05 to 1.18) | |
OE difference | 0.49% (0.22% to 0.75%) | |
Calibration Intercept | -0.26 (-0.34 to -0.17) | |
Calibration Slope | 0.62 (0.57 to 0.67) | |
Discrimination | ||
AUC | 0.88 (0.87 to 0.89) | |
Overall performance | ||
Brier | 0.04 (0.04 to 0.04) | |
rm(list=ls()[! ls() %in% c("imp.datosA", "imp.datosA2", "vdata",
"primary_event", "horizon",
"process_imp_cal_plot", "seed", "n_cores", "kfre_pi",
"imputs")])
gc() used (Mb) gc trigger (Mb) max used (Mb)
Ncells 4083042 218.1 7207984 385.0 7207984 385.0
Vcells 11096693 84.7 42027656 320.7 187462211 1430.3
1.8.2 Moderate calibration: Calibration curves lowess based on subdistributional hazards
primary_event <- 1
n_internal_knots <- 5
# Seleccion del grupo: Stages 3-4----
# A 2 años----
horizon <- 2
vdata <- imp.datosA %>%
rename(pred = risk2y) |>
select(.imp, .id, time, eventd, pred) |>
mutate(cll_pred = log(-log(1 - pred)))
rcs_vdata <- ns(vdata$cll_pred, df = n_internal_knots + 1)
colnames(rcs_vdata) <- paste0("basisf_", colnames(rcs_vdata))
knots <- attr(rcs_vdata, "knots")
bound.knots <- attr(rcs_vdata, "Boundary.knots")
pp <- seq(min(vdata$pred), max(vdata$pred), length.out = 1000)
cll_pp <- log(-log(1 - pp))
rcs_cll_pp <- ns(cll_pp, knots = knots, Boundary.knots = bound.knots)
colnames(rcs_cll_pp) <- paste0("basisf_", colnames(rcs_cll_pp))
vdata_bis_pp <- cbind(pp, as.data.frame(rcs_cll_pp))
future::plan("multisession", workers = n_cores)
subdist_df_imp <- future_map(1:max(vdata$.imp),
process_imp_cal_plot,
vdata = vdata,
primary_event = primary_event,
horizon = horizon,
type = "subdist_hazard",
n_internal_knots = n_internal_knots,
vdata_bis_pp,
.options = furrr_options(seed = seed,
packages = c("riskRegression",
"survival",
"splines",
"cmprsk",
"tidyverse")),
.progress = TRUE)[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
# 5 knots seems to give somewhat equivalent graph to pseudo method with bw = 0.05
subdist_df_imp_obs <- subdist_df_imp |>
bind_rows() |>
filter(type == "observed")
subdist_df_stack <- subdist_df_imp_obs |>
group_by(.imp) |>
mutate(deciles_risk = as.integer(quantcut(risk, seq(0, 1, by = 0.1)))) |>
group_by(.imp, deciles_risk) |>
summarise(obs_mean_imp = mean(obs),
risk_mean_imp = mean(risk)) |>
group_by(deciles_risk) |>
summarise(obs_mean_pool = mean(obs_mean_imp),
risk_mean_pool = mean(risk_mean_imp))
subdist_df_imp_fix <- subdist_df_imp |>
bind_rows() |>
filter(type == "fixed") |>
arrange(risk) |>
summarise(obs_pool = mean(obs),
.by = risk) |>
mutate(deciles_risk = quantcut(risk, seq(0, 1, by = 0.1)))
rio::export(subdist_df_imp, here("Data", "Tidy", "subdist_df_imput_3a4_2y_metC.rds"))
rio::export(subdist_df_stack, here("Data", "Tidy", "subdist_df_deciles_3a4_2y_metC.rds"))
# Grafico de calibracion
ggplot() +
geom_abline(intercept = 0, slope = 1, colour = "red", linetype = 2) +
geom_line(data = subdist_df_imp_fix,
aes(x = risk, y = obs_pool),
alpha = 0.5, color = "black") +
geom_point(data = subdist_df_stack,
aes(x = risk_mean_pool, y = obs_mean_pool),
shape = 23,
stroke = 0.1,
fill = "blue") +
scale_y_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
scale_x_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
theme_bw() +
labs(x = "Predicted risks", y = "Observed outcome proportions") +
# geom_rug(data = subdist_df_stack,
# aes(x = risk_mean_pool, y = obs_mean_pool),
# alpha = 0.1) +
coord_fixed(ratio = 1, expand = TRUE) -> plot_mod0_2y
# Grafico de calibracion con curvas imputadas
plot_mod0_2y +
geom_line(data = subdist_df_imp_obs,
aes(x = risk, y = obs, group = .imp),
alpha = 0.1, color = "#38B8F7") -> plot_mod0_imp_2y
gc() used (Mb) gc trigger (Mb) max used (Mb)
Ncells 4082330 218.1 7207984 385.0 7207984 385.0
Vcells 11313246 86.4 42027656 320.7 187462211 1430.3
# A 5 años----
horizon <- 5
vdata <- imp.datosA %>%
rename(pred = risk5y) |>
select(.imp, .id, time, eventd, pred) |>
mutate(cll_pred = log(-log(1 - pred)))
rcs_vdata <- ns(vdata$cll_pred, df = n_internal_knots + 1)
colnames(rcs_vdata) <- paste0("basisf_", colnames(rcs_vdata))
knots <- attr(rcs_vdata, "knots")
bound.knots <- attr(rcs_vdata, "Boundary.knots")
pp <- seq(min(vdata$pred), max(vdata$pred), length.out = 1000)
cll_pp <- log(-log(1 - pp))
rcs_cll_pp <- ns(cll_pp, knots = knots, Boundary.knots = bound.knots)
colnames(rcs_cll_pp) <- paste0("basisf_", colnames(rcs_cll_pp))
vdata_bis_pp <- cbind(pp, as.data.frame(rcs_cll_pp))
future::plan("multisession", workers = n_cores)
subdist_df_imp <- future_map(1:max(vdata$.imp),
process_imp_cal_plot,
vdata = vdata,
primary_event = primary_event,
horizon = horizon,
type = "subdist_hazard",
n_internal_knots = n_internal_knots,
vdata_bis_pp,
.options = furrr_options(seed = seed,
packages = c("riskRegression",
"survival",
"splines",
"cmprsk",
"tidyverse")),
.progress = TRUE)[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
# 5 knots seems to give somewhat equivalent graph to pseudo method with bw = 0.05
subdist_df_imp_obs <- subdist_df_imp |>
bind_rows() |>
filter(type == "observed")
subdist_df_stack <- subdist_df_imp_obs |>
group_by(.imp) |>
mutate(deciles_risk = as.integer(quantcut(risk, seq(0, 1, by = 0.1)))) |>
group_by(.imp, deciles_risk) |>
summarise(obs_mean_imp = mean(obs),
risk_mean_imp = mean(risk)) |>
group_by(deciles_risk) |>
summarise(obs_mean_pool = mean(obs_mean_imp),
risk_mean_pool = mean(risk_mean_imp))
subdist_df_imp_fix <- subdist_df_imp |>
bind_rows() |>
filter(type == "fixed") |>
arrange(risk) |>
summarise(obs_pool = mean(obs),
.by = risk) |>
mutate(deciles_risk = quantcut(risk, seq(0, 1, by = 0.1)))
rio::export(subdist_df_imp, here("Data", "Tidy", "subdist_df_imput_3a4_5y_metC.rds"))
rio::export(subdist_df_stack, here("Data", "Tidy", "subdist_df_deciles_3a4_5y_metC.rds"))
# Grafico de calibracion
ggplot() +
geom_abline(intercept = 0, slope = 1, colour = "red", linetype = 2) +
geom_line(data = subdist_df_imp_fix,
aes(x = risk, y = obs_pool),
alpha = 0.5, color = "black") +
geom_point(data = subdist_df_stack,
aes(x = risk_mean_pool, y = obs_mean_pool),
shape = 23,
stroke = 0.1,
fill = "blue",
alpha = 0.5) +
scale_y_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
scale_x_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
theme_bw() +
labs(x = "Predicted risks", y = "Observed outcome proportions") +
# geom_rug(data = subdist_df_stack,
# aes(x = risk_mean_pool, y = obs_mean_pool),
# alpha = 0.1) +
coord_fixed(ratio = 1, expand = TRUE) -> plot_mod0_5y
# Grafico de calibracion con curvas imputadas
plot_mod0_5y +
geom_line(data = subdist_df_imp_obs,
aes(x = risk, y = obs, group = .imp),
alpha = 0.3, color = "#38B8F7") -> plot_mod0_imp_5y
## Grafico final
plot_mod0_2y <- plot_mod0_2y +
labs(title = "Método C\n(2 year KFRE)") +
theme(plot.title = element_text(hjust = 0.5))
plot_mod0_5y <- plot_mod0_5y +
labs(title = "Método C\n(5 year KFRE)") +
theme(plot.title = element_text(hjust = 0.5))
plot_mod0_imp_2y <- plot_mod0_imp_2y +
labs(title = "Método C\n(2 year KFRE)") +
theme(plot.title = element_text(hjust = 0.5))
plot_mod0_imp_5y <- plot_mod0_imp_5y +
labs(title = "Método C\n(5 year KFRE)") +
theme(plot.title = element_text(hjust = 0.5))
plot_cal_mod0 <- plot_mod0_2y / plot_mod0_5y
plot_cal_imp_mod0 <- plot_mod0_imp_2y / plot_mod0_imp_5y
export(plot_mod0_2y, here("Data", "Tidy", "plot_metC_2y.rds"))
export(plot_mod0_5y, here("Data", "Tidy", "plot_metC_5y.rds"))
export(plot_cal_mod0, here("Data", "Tidy", "plot_cal_metC.rds"))
export(plot_mod0_imp_2y, here("Data", "Tidy", "plot_metC_imp_2y.rds"))
export(plot_mod0_imp_5y, here("Data", "Tidy", "plot_metC_imp_5y.rds"))
export(plot_cal_imp_mod0, here("Data", "Tidy", "plot_cal_imp_metC.rds"))
ggsave(filename = "Plot_Calibration_imputed_metC.png",
device = "png",
plot = plot_cal_mod0,
path = here("Figures"),
scale = 2,
width = 12,
height = 12,
units = "cm",
dpi = 600)
ggsave(filename = "Plot_Calibration_imputed_estabilidad_metC.png",
device = "png",
plot = plot_cal_imp_mod0,
path = here("Figures"),
scale = 2,
width = 12,
height = 12,
units = "cm",
dpi = 600)
gc() used (Mb) gc trigger (Mb) max used (Mb)
Ncells 4086326 218.3 7207984 385.0 7207984 385.0
Vcells 11681344 89.2 42045987 320.8 187462211 1430.3
knitr::include_graphics(here("Figures", "Plot_Calibration_imputed_metC.png"))knitr::include_graphics(here("Figures", "Plot_Calibration_imputed_estabilidad_metC.png"))1.9 Metodo D: Reestimar coeficientes mediante Cause-specific Hazard Models
source(here("Code", "source", "kfre_pi.R"))
source(here("Code", "source", "kfre_pr.R"))
source(here("Code", "source", "oe_ratio.R"))
source(here("Code", "source", "calibration_intercept.R"))
source(here("Code", "source", "calibration_slope.R"))
source(here("Code", "source", "auc.R"))
source(here("Code", "source", "auc_brier_boot.R"))
source(here("Code", "source", "validate.mids.R"))
source(here("Code", "source", "pool.validate.mids.R"))
source(here("Code", "source", "pool.auc.mids.R"))
source(here("Code", "source", "process_imp_cal_plot.R"))
source(here("Code", "source", "predict.mira.R"))
source(here("Code", "source", "performance_measures.R"))
source(here("Code", "source", "tidy_performance_stack.R"))
source(here("Code", "source", "tidy_pool.R"))
source(here("Code", "source", "process_imp_cal_plot2.R"))
source(here("Code", "source", "print_equation.R")) df_recal_metD <- import(here("Data", "Tidy", "equations", "df_recal_modD.rds"))
df_recal_metD2y <- df_recal_metD |>
filter(year == 2) |>
select(-year) |>
rename(st0_imp2y = st0_imp,
fc_coef_imp2y = fc_coef_imp)
df_recal_metD5y <- df_recal_metD |>
filter(year == 5) |>
select(-year) |>
rename(st0_imp5y = st0_imp,
fc_coef_imp5y = fc_coef_imp)
rm(df_recal_metD)
gc() used (Mb) gc trigger (Mb) max used (Mb)
Ncells 4087558 218.3 7207984 385.0 7207984 385.0
Vcells 11669250 89.1 52542219 400.9 187462211 1430.3
imp.datosA <- imp.datosA2 |>
left_join(df_recal_metD2y, by = ".imp") |>
left_join(df_recal_metD5y, by = ".imp") |>
mutate(eventdf = factor(eventd),
risk2y = 1 - st0_imp2y ^ exp(fc_coef_imp2y * kfre_pi(imp.datosA2)),
risk5y = 1 - st0_imp5y ^ exp(fc_coef_imp5y * kfre_pi(imp.datosA2))) |>
select(.imp, .id, time, eventd, eventdf, risk2y, risk5y)
rm(df_recal_metD2y, df_recal_metD5y)head(imp.datosA)1.9.1 Calibration and Discrimination Measures
future::plan("multisession", workers = n_cores)
results_stack3a4_2y <- tidy_performance_stack(imp.datosA,
horizon = 2,
primary_event = 1,
pred = "risk2y",
seed = seed,
n_cores = n_cores)
gc() used (Mb) gc trigger (Mb) max used (Mb)
Ncells 4088458 218.4 7207984 385.0 7207984 385.0
Vcells 12030787 91.8 42033776 320.7 187462211 1430.3
rio::export(results_stack3a4_2y , here("Data", "Tidy",
"res_valext_kfre_stack3a4_2y_metD.rds"))
future::plan("multisession", workers = n_cores)
results_stack3a4_5y <- tidy_performance_stack(imp.datosA,
horizon = 5,
primary_event = 1,
pred = "risk5y",
seed = seed,
n_cores = n_cores)
rio::export(results_stack3a4_5y, here("Data", "Tidy",
"res_valext_kfre_stack3a4_5y_metD.rds"))
gc() used (Mb) gc trigger (Mb) max used (Mb)
Ncells 4088458 218.4 7207984 385.0 7207984 385.0
Vcells 12030870 91.8 42033776 320.7 187462211 1430.3
res_pool1 <- tidy_pool(results_stack3a4_2y)
res_pool1 |>
kbl() |>
kable_classic(full_width = T, html_font = "Cambria")| term | estimate | ll | ul | pval | ubar | b | t | dfcom | df | riv | lambda | fmi | n |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Average predicted risk | 0.0271243 | NaN | NaN | NA | NaN | 0.0000000 | NaN | 30030 | NA | NaN | NaN | NaN | 30031 |
| Overall observerd risk | 0.0273046 | 0.0254293 | 0.0291799 | 0.0000000 | 0.0000009 | 0.0000000 | 0.0000009 | 30030 | 30030.00000 | 0.0000000 | 0.0000000 | 0.0000666 | 30031 |
| Log OE ratio | 0.0066260 | -0.0622337 | 0.0754858 | 0.8504016 | 0.0012279 | 0.0000048 | 0.0012342 | 30030 | 21410.19652 | 0.0051710 | 0.0051444 | 0.0000932 | 30031 |
| OE difference | 0.0001803 | -0.0016998 | 0.0020604 | 0.8509201 | 0.0000009 | 0.0000000 | 0.0000009 | 30030 | 21582.14312 | 0.0050979 | 0.0050720 | 0.0000924 | 30031 |
| Calibration Intercept | -0.0091549 | -0.1138241 | 0.0955143 | 0.8620119 | 0.0022879 | 0.0003494 | 0.0027538 | 30030 | 69.69343 | 0.2036091 | 0.1691654 | 0.0251857 | 30031 |
| Calibration Slope | 0.8493046 | 0.7625945 | 0.9360148 | 0.0000000 | 0.0010126 | 0.0004644 | 0.0016318 | 30029 | 13.88045 | 0.6114667 | 0.3794473 | 0.0960018 | 30031 |
| Logit AUC | 2.2297884 | 2.0794618 | 2.3801149 | 0.0000000 | 0.0035248 | 0.0012409 | 0.0051792 | 30030 | 19.58064 | 0.4693847 | 0.3194430 | 0.0744247 | 30031 |
| Brier | 0.0229436 | 0.0213151 | 0.0245721 | 0.0000000 | 0.0000005 | 0.0000001 | 0.0000007 | 30030 | 58.94212 | 0.2254656 | 0.1839836 | 0.0293180 | 30031 |
res_pool2 <- tidy_pool(results_stack3a4_5y)
res_pool2 |>
kbl() |>
kable_classic(full_width = T, html_font = "Cambria")| term | estimate | ll | ul | pval | ubar | b | t | dfcom | df | riv | lambda | fmi | n |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Average predicted risk | 0.0471597 | NaN | NaN | NA | NaN | 0.00e+00 | NaN | 30030 | NA | NaN | NaN | NaN | 30031 |
| Overall observerd risk | 0.0476187 | 0.0450853 | 0.0501521 | 0.0000000 | 0.0000017 | 0.00e+00 | 0.0000017 | 30030 | 30030.00000 | 0.0000000 | 0.0000000 | 0.0000666 | 30031 |
| Log OE ratio | 0.0096887 | -0.0437872 | 0.0631645 | 0.7224911 | 0.0007368 | 5.60e-06 | 0.0007443 | 30030 | 11824.80583 | 0.0101951 | 0.0100922 | 0.0001682 | 30031 |
| OE difference | 0.0004590 | -0.0020871 | 0.0030052 | 0.7237997 | 0.0000017 | 0.00e+00 | 0.0000017 | 30030 | 12117.43427 | 0.0099872 | 0.0098885 | 0.0001642 | 30031 |
| Calibration Intercept | -0.0152086 | -0.0877588 | 0.0573416 | 0.6811130 | 0.0013436 | 1.94e-05 | 0.0013695 | 30030 | 4702.70409 | 0.0192693 | 0.0189050 | 0.0004210 | 30031 |
| Calibration Slope | 0.8417300 | 0.7871257 | 0.8963344 | 0.0000000 | 0.0005867 | 1.13e-04 | 0.0007374 | 30029 | 47.77946 | 0.2568972 | 0.2043900 | 0.0353610 | 30031 |
| Logit AUC | 2.0378663 | 1.9444443 | 2.1312882 | 0.0000000 | 0.0022485 | 1.73e-05 | 0.0022715 | 30030 | 11756.41892 | 0.0102445 | 0.0101407 | 0.0001692 | 30031 |
| Brier | 0.0370930 | 0.0351267 | 0.0390593 | 0.0000000 | 0.0000009 | 1.00e-07 | 0.0000010 | 30030 | 168.26334 | 0.1219319 | 0.1086803 | 0.0110433 | 30031 |
tab_res_2y <- res_pool1 |>
select(term, estimate, ll, ul) |>
mutate(
across(c(estimate, ll, ul), ~ if_else(term == "Log OE ratio", exp(.x), .x)),
across(c(estimate, ll, ul), ~ if_else(term == "Logit AUC", exp(.x) / (1 + exp(.x)), .x)),
term = if_else(term == "Log OE ratio", "OE ratio", term),
term = if_else(term == "Logit AUC", "AUC", term),
across(c(estimate, ll, ul), ~ if_else(term %in%
c("Average predicted risk",
"Overall observerd risk",
"OE difference"), 100 * .x, .x)),
across(c(estimate, ll, ul), ~ round(.x, 2)),
measures = case_when(term == "Average predicted risk" ~ str_glue("{estimate}%"),
term %in% c("Overall observerd risk", "OE difference") ~ str_glue("{estimate}% ({ll}% to {ul}%)"),
TRUE ~ str_glue("{estimate} ({ll} to {ul})")
)
) |>
select(term, measures) |>
mutate(grupo = c(rep("Calibration", 6), "Discrimination", "Overall performance"),
year = "2 years")
tab_res_5y <- res_pool2 |>
select(term, estimate, ll, ul) |>
mutate(
across(c(estimate, ll, ul), ~ if_else(term == "Log OE ratio", exp(.x), .x)),
across(c(estimate, ll, ul), ~ if_else(term == "Logit AUC", exp(.x) / (1 + exp(.x)), .x)),
term = if_else(term == "Log OE ratio", "OE ratio", term),
term = if_else(term == "Logit AUC", "AUC", term),
across(c(estimate, ll, ul), ~ if_else(term %in%
c("Average predicted risk",
"Overall observerd risk",
"OE difference"), 100 * .x, .x)),
across(c(estimate, ll, ul), ~ round(.x, 2)),
measures = case_when(term == "Average predicted risk" ~ str_glue("{estimate}%"),
term %in% c("Overall observerd risk", "OE difference") ~ str_glue("{estimate}% ({ll}% to {ul}%)"),
TRUE ~ str_glue("{estimate} ({ll} to {ul})")
)
) |>
select(term, measures) |>
mutate(grupo = c(rep("Calibration", 6), "Discrimination", "Overall performance"),
year = "5 years")
tab_res0 <- tab_res_2y |>
bind_rows(tab_res_5y)
tab_res0 |>
as_grouped_data(groups = "year") |>
as_grouped_data(groups = "grupo") |>
flextable::as_flextable(hide_grouplabel = TRUE) |>
autofit() |>
set_header_labels(
year = "Time horizon",
term = "Performance measure",
measures = "Method D"
) |>
bold(j = 1) |>
set_caption("Table. Performance measures of KFRE in the external dataset of patients with CKD Stages 3a-4") |>
theme_booktabs() |>
bold(bold = TRUE, part = "header") -> table_perf_final
table_perf_final %>%
flextable::save_as_docx(path = here("Tables/Table_Imputed_Performance_metD.docx"))
table_perf_finalTime horizon | Performance measure | Method D |
|---|---|---|
2 years | ||
Calibration | ||
Average predicted risk | 2.71% | |
Overall observerd risk | 2.73% (2.54% to 2.92%) | |
OE ratio | 1.01 (0.94 to 1.08) | |
OE difference | 0.02% (-0.17% to 0.21%) | |
Calibration Intercept | -0.01 (-0.11 to 0.1) | |
Calibration Slope | 0.85 (0.76 to 0.94) | |
Discrimination | ||
AUC | 0.9 (0.89 to 0.92) | |
Overall performance | ||
Brier | 0.02 (0.02 to 0.02) | |
5 years | ||
Calibration | ||
Average predicted risk | 4.72% | |
Overall observerd risk | 4.76% (4.51% to 5.02%) | |
OE ratio | 1.01 (0.96 to 1.07) | |
OE difference | 0.05% (-0.21% to 0.3%) | |
Calibration Intercept | -0.02 (-0.09 to 0.06) | |
Calibration Slope | 0.84 (0.79 to 0.9) | |
Discrimination | ||
AUC | 0.88 (0.87 to 0.89) | |
Overall performance | ||
Brier | 0.04 (0.04 to 0.04) | |
rm(list=ls()[! ls() %in% c("imp.datosA", "imp.datosA2", "vdata",
"primary_event", "horizon",
"process_imp_cal_plot", "seed", "n_cores", "kfre_pi",
"imputs")])
gc() used (Mb) gc trigger (Mb) max used (Mb)
Ncells 4083245 218.1 7207984 385.0 7207984 385.0
Vcells 11103353 84.8 42033776 320.7 187462211 1430.3
1.9.2 Moderate calibration: Calibration curves lowess based on subdistributional hazards
primary_event <- 1
n_internal_knots <- 5
# Seleccion del grupo: Stages 3-4----
# A 2 años----
horizon <- 2
vdata <- imp.datosA %>%
rename(pred = risk2y) |>
select(.imp, .id, time, eventd, pred) |>
mutate(cll_pred = log(-log(1 - pred)))
rcs_vdata <- ns(vdata$cll_pred, df = n_internal_knots + 1)
colnames(rcs_vdata) <- paste0("basisf_", colnames(rcs_vdata))
knots <- attr(rcs_vdata, "knots")
bound.knots <- attr(rcs_vdata, "Boundary.knots")
pp <- seq(min(vdata$pred), max(vdata$pred), length.out = 1000)
cll_pp <- log(-log(1 - pp))
rcs_cll_pp <- ns(cll_pp, knots = knots, Boundary.knots = bound.knots)
colnames(rcs_cll_pp) <- paste0("basisf_", colnames(rcs_cll_pp))
vdata_bis_pp <- cbind(pp, as.data.frame(rcs_cll_pp))
future::plan("multisession", workers = n_cores)
subdist_df_imp <- future_map(1:max(vdata$.imp),
process_imp_cal_plot,
vdata = vdata,
primary_event = primary_event,
horizon = horizon,
type = "subdist_hazard",
n_internal_knots = n_internal_knots,
vdata_bis_pp,
.options = furrr_options(seed = seed,
packages = c("riskRegression",
"survival",
"splines",
"cmprsk",
"tidyverse")),
.progress = TRUE)[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
# 5 knots seems to give somewhat equivalent graph to pseudo method with bw = 0.05
subdist_df_imp_obs <- subdist_df_imp |>
bind_rows() |>
filter(type == "observed")
subdist_df_stack <- subdist_df_imp_obs |>
group_by(.imp) |>
mutate(deciles_risk = as.integer(quantcut(risk, seq(0, 1, by = 0.1)))) |>
group_by(.imp, deciles_risk) |>
summarise(obs_mean_imp = mean(obs),
risk_mean_imp = mean(risk)) |>
group_by(deciles_risk) |>
summarise(obs_mean_pool = mean(obs_mean_imp),
risk_mean_pool = mean(risk_mean_imp))
subdist_df_imp_fix <- subdist_df_imp |>
bind_rows() |>
filter(type == "fixed") |>
arrange(risk) |>
summarise(obs_pool = mean(obs),
.by = risk) |>
mutate(deciles_risk = quantcut(risk, seq(0, 1, by = 0.1)))
rio::export(subdist_df_imp, here("Data", "Tidy", "subdist_df_imput_3a4_2y_metD.rds"))
rio::export(subdist_df_stack, here("Data", "Tidy", "subdist_df_deciles_3a4_2y_metD.rds"))
# Grafico de calibracion
ggplot() +
geom_abline(intercept = 0, slope = 1, colour = "red", linetype = 2) +
geom_line(data = subdist_df_imp_fix,
aes(x = risk, y = obs_pool),
alpha = 0.5, color = "black") +
geom_point(data = subdist_df_stack,
aes(x = risk_mean_pool, y = obs_mean_pool),
shape = 23,
stroke = 0.1,
fill = "blue") +
scale_y_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
scale_x_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
theme_bw() +
labs(x = "Predicted risks", y = "Observed outcome proportions") +
# geom_rug(data = subdist_df_stack,
# aes(x = risk_mean_pool, y = obs_mean_pool),
# alpha = 0.1) +
coord_fixed(ratio = 1, expand = TRUE) -> plot_mod0_2y
# Grafico de calibracion con curvas imputadas
plot_mod0_2y +
geom_line(data = subdist_df_imp_obs,
aes(x = risk, y = obs, group = .imp),
alpha = 0.1, color = "#38B8F7") -> plot_mod0_imp_2y
gc() used (Mb) gc trigger (Mb) max used (Mb)
Ncells 4082519 218.1 7207984 385.0 7207984 385.0
Vcells 11319890 86.4 42033776 320.7 187462211 1430.3
# A 5 años----
horizon <- 5
vdata <- imp.datosA %>%
rename(pred = risk5y) |>
select(.imp, .id, time, eventd, pred) |>
mutate(cll_pred = log(-log(1 - pred)))
rcs_vdata <- ns(vdata$cll_pred, df = n_internal_knots + 1)
colnames(rcs_vdata) <- paste0("basisf_", colnames(rcs_vdata))
knots <- attr(rcs_vdata, "knots")
bound.knots <- attr(rcs_vdata, "Boundary.knots")
pp <- seq(min(vdata$pred), max(vdata$pred), length.out = 1000)
cll_pp <- log(-log(1 - pp))
rcs_cll_pp <- ns(cll_pp, knots = knots, Boundary.knots = bound.knots)
colnames(rcs_cll_pp) <- paste0("basisf_", colnames(rcs_cll_pp))
vdata_bis_pp <- cbind(pp, as.data.frame(rcs_cll_pp))
future::plan("multisession", workers = n_cores)
subdist_df_imp <- future_map(1:max(vdata$.imp),
process_imp_cal_plot,
vdata = vdata,
primary_event = primary_event,
horizon = horizon,
type = "subdist_hazard",
n_internal_knots = n_internal_knots,
vdata_bis_pp,
.options = furrr_options(seed = seed,
packages = c("riskRegression",
"survival",
"splines",
"cmprsk",
"tidyverse")),
.progress = TRUE)[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
[1] "basisf_1" "basisf_2" "basisf_3" "basisf_4" "basisf_5" "basisf_6"
# 5 knots seems to give somewhat equivalent graph to pseudo method with bw = 0.05
subdist_df_imp_obs <- subdist_df_imp |>
bind_rows() |>
filter(type == "observed")
subdist_df_stack <- subdist_df_imp_obs |>
group_by(.imp) |>
mutate(deciles_risk = as.integer(quantcut(risk, seq(0, 1, by = 0.1)))) |>
group_by(.imp, deciles_risk) |>
summarise(obs_mean_imp = mean(obs),
risk_mean_imp = mean(risk)) |>
group_by(deciles_risk) |>
summarise(obs_mean_pool = mean(obs_mean_imp),
risk_mean_pool = mean(risk_mean_imp))
subdist_df_imp_fix <- subdist_df_imp |>
bind_rows() |>
filter(type == "fixed") |>
arrange(risk) |>
summarise(obs_pool = mean(obs),
.by = risk) |>
mutate(deciles_risk = quantcut(risk, seq(0, 1, by = 0.1)))
rio::export(subdist_df_imp, here("Data", "Tidy", "subdist_df_imput_3a4_5y_metD.rds"))
rio::export(subdist_df_stack, here("Data", "Tidy", "subdist_df_deciles_3a4_5y_metD.rds"))
# Grafico de calibracion
ggplot() +
geom_abline(intercept = 0, slope = 1, colour = "red", linetype = 2) +
geom_line(data = subdist_df_imp_fix,
aes(x = risk, y = obs_pool),
alpha = 0.5, color = "black") +
geom_point(data = subdist_df_stack,
aes(x = risk_mean_pool, y = obs_mean_pool),
shape = 23,
stroke = 0.1,
fill = "blue",
alpha = 0.5) +
scale_y_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
scale_x_continuous(breaks = seq(0, 1, 0.2), limits = c(0, 1)) +
theme_bw() +
labs(x = "Predicted risks", y = "Observed outcome proportions") +
# geom_rug(data = subdist_df_stack,
# aes(x = risk_mean_pool, y = obs_mean_pool),
# alpha = 0.1) +
coord_fixed(ratio = 1, expand = TRUE) -> plot_mod0_5y
# Grafico de calibracion con curvas imputadas
plot_mod0_5y +
geom_line(data = subdist_df_imp_obs,
aes(x = risk, y = obs, group = .imp),
alpha = 0.3, color = "#38B8F7") -> plot_mod0_imp_5y
## Grafico final
plot_mod0_2y <- plot_mod0_2y +
labs(title = "Método D\n(2 year KFRE)") +
theme(plot.title = element_text(hjust = 0.5))
plot_mod0_5y <- plot_mod0_5y +
labs(title = "Método D\n(5 year KFRE)") +
theme(plot.title = element_text(hjust = 0.5))
plot_mod0_imp_2y <- plot_mod0_imp_2y +
labs(title = "Método D\n(2 year KFRE)") +
theme(plot.title = element_text(hjust = 0.5))
plot_mod0_imp_5y <- plot_mod0_imp_5y +
labs(title = "Método D\n(5 year KFRE)") +
theme(plot.title = element_text(hjust = 0.5))
plot_cal_mod0 <- plot_mod0_2y / plot_mod0_5y
plot_cal_imp_mod0 <- plot_mod0_imp_2y / plot_mod0_imp_5y
export(plot_mod0_2y, here("Data", "Tidy", "plot_metD_2y.rds"))
export(plot_mod0_5y, here("Data", "Tidy", "plot_metD_5y.rds"))
export(plot_cal_mod0, here("Data", "Tidy", "plot_cal_metD.rds"))
export(plot_mod0_imp_2y, here("Data", "Tidy", "plot_metD_imp_2y.rds"))
export(plot_mod0_imp_5y, here("Data", "Tidy", "plot_metD_imp_5y.rds"))
export(plot_cal_imp_mod0, here("Data", "Tidy", "plot_cal_imp_metD.rds"))
ggsave(filename = "Plot_Calibration_imputed_metD.png",
device = "png",
plot = plot_cal_mod0,
path = here("Figures"),
scale = 2,
width = 12,
height = 12,
units = "cm",
dpi = 600)
ggsave(filename = "Plot_Calibration_imputed_estabilidad_metD.png",
device = "png",
plot = plot_cal_imp_mod0,
path = here("Figures"),
scale = 2,
width = 12,
height = 12,
units = "cm",
dpi = 600)
gc() used (Mb) gc trigger (Mb) max used (Mb)
Ncells 4086564 218.3 7207984 385.0 7207984 385.0
Vcells 11688070 89.2 42052652 320.9 187462211 1430.3
knitr::include_graphics(here("Figures", "Plot_Calibration_imputed_metD.png"))knitr::include_graphics(here("Figures", "Plot_Calibration_imputed_estabilidad_metD.png"))1.10 Comparacion de modelos
plot_cal_metA <- import(here("Data", "Tidy", "plot_cal_metA.rds"))
plot_cal_metB <- import(here("Data", "Tidy", "plot_cal_metB.rds"))
plot_cal_metC <- import(here("Data", "Tidy", "plot_cal_metC.rds"))
plot_cal_metD <- import(here("Data", "Tidy", "plot_cal_metD.rds"))(plot_cal_metA | plot_cal_metB | plot_cal_metC | plot_cal_metD) + plot_annotation(tag_levels = "A") -> plot_comp_calib
ggsave(filename = "Plot_Compare_Methods.png",
plot = plot_comp_calib,
device = "png",
path = here("Figures"),
scale = 2.5,
width = 12,
height = 6,
units = "cm")1.11 Ticket de Reprocubilidad
sessionInfo()R version 4.3.3 (2024-02-29 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 11 x64 (build 22631)
Matrix products: default
locale:
[1] LC_COLLATE=English_United States.utf8
[2] LC_CTYPE=English_United States.utf8
[3] LC_MONETARY=English_United States.utf8
[4] LC_NUMERIC=C
[5] LC_TIME=English_United States.utf8
time zone: America/Lima
tzcode source: internal
attached base packages:
[1] parallel grid splines stats graphics grDevices utils
[8] datasets methods base
other attached packages:
[1] smplot2_0.1.0 impstep_0.1.0
[3] yardstick_1.3.1 workflowsets_1.1.0
[5] workflows_1.1.4 tune_1.2.0
[7] recipes_1.0.10 parsnip_1.2.1
[9] modeldata_1.3.0 infer_1.0.7
[11] dials_1.2.1 tidymodels_1.2.0
[13] rio_1.0.1 tictoc_1.2.1
[15] ggmice_0.1.0 furrr_0.3.1
[17] future_1.33.2 ggExtra_0.10.1
[19] gtools_3.9.5 DescTools_0.99.54
[21] naniar_1.1.0 mice_3.16.0
[23] PerformanceAnalytics_2.0.4 xts_0.13.2
[25] zoo_1.8-12 VIM_6.2.2
[27] colorspace_2.1-0 janitor_2.2.0
[29] gt_0.10.1 DiagrammeR_1.0.11
[31] ggtext_0.1.2 htmltools_0.5.8
[33] devtools_2.4.5 usethis_2.2.3
[35] gghalves_0.1.4 xml2_1.3.6
[37] downlit_0.4.3 broom_1.0.5
[39] dcurves_0.4.0 glue_1.7.0
[41] labelled_2.12.0 scales_1.3.0
[43] cowplot_1.1.3 ggsci_3.0.3
[45] patchwork_1.2.0 webshot_0.5.5
[47] gridExtra_2.3 rsample_1.2.1
[49] lubridate_1.9.3 forcats_1.0.0
[51] stringr_1.5.1 dplyr_1.1.4
[53] purrr_1.0.2 readr_2.1.5
[55] tidyr_1.3.1 tibble_3.2.1
[57] tidyverse_2.0.0 boot_1.3-30
[59] gtsummary_1.7.2 flextable_0.9.5
[61] kableExtra_1.4.0 knitr_1.45
[63] plotrix_3.8-4 pec_2023.04.12
[65] prodlim_2023.08.28 pseudo_1.4.3
[67] geepack_1.3.10 KMsurv_0.1-5
[69] mstate_0.3.2 riskRegression_2023.12.21
[71] cmprsk_2.2-11 rms_6.8-0
[73] Hmisc_5.1-2 survminer_0.4.9
[75] ggpubr_0.6.0 ggplot2_3.5.0
[77] survival_3.5-8 skimr_2.1.5
[79] here_1.0.1
loaded via a namespace (and not attached):
[1] R.methodsS3_1.8.2 gld_2.6.6 pacman_0.5.1
[4] urlchecker_1.0.1 nnet_7.3-19 TH.data_1.1-2
[7] vctrs_0.6.5 png_0.1-8 digest_0.6.35
[10] shape_1.4.6.1 proxy_0.4-27 Exact_3.2
[13] httpcode_0.3.0 parallelly_1.37.1 MASS_7.3-60.0.1
[16] fontLiberation_0.1.0 httpuv_1.6.15 foreach_1.5.2
[19] withr_3.0.0 xfun_0.43 ellipsis_0.3.2
[22] memoise_2.0.1 crul_1.4.0 MatrixModels_0.5-3
[25] profvis_0.3.8 systemfonts_1.0.6 ragg_1.3.0
[28] R.oo_1.26.0 DEoptimR_1.1-3 Formula_1.2-5
[31] promises_1.2.1 httr_1.4.7 rstatix_0.7.2
[34] globals_0.16.3 rstudioapi_0.16.0 pan_1.9
[37] miniUI_0.1.1.1 generics_0.1.3 base64enc_0.1-3
[40] curl_5.2.1 repr_1.1.7 quadprog_1.5-8
[43] xtable_1.8-4 evaluate_0.23 hms_1.1.3
[46] glmnet_4.1-8 visNetwork_2.1.2 readxl_1.4.3
[49] magrittr_2.0.3 lmtest_0.9-40 snakecase_0.11.1
[52] later_1.3.2 lattice_0.22-5 future.apply_1.11.2
[55] robustbase_0.99-2 SparseM_1.81 lhs_1.1.6
[58] class_7.3-22 pillar_1.9.0 nlme_3.1-164
[61] iterators_1.0.14 GPfit_1.0-8 compiler_4.3.3
[64] stringi_1.8.3 gower_1.0.1 jomo_2.7-6
[67] minqa_1.2.6 crayon_1.5.2 abind_1.4-5
[70] haven_2.5.4 sp_2.1-3 rootSolve_1.8.2.4
[73] sandwich_3.1-0 codetools_0.2-19 multcomp_1.4-25
[76] textshaping_0.3.7 openssl_2.1.1 e1071_1.7-14
[79] lmom_3.0 mime_0.12 Rcpp_1.0.12
[82] quantreg_5.97 DiceDesign_1.10 cellranger_1.1.0
[85] gridtext_0.1.5 utf8_1.2.4 lme4_1.1-35.2
[88] fs_1.6.3 listenv_0.9.1 checkmate_2.3.1
[91] pkgbuild_1.4.4 expm_0.999-9 ggsignif_0.6.4
[94] Matrix_1.6-5 tzdb_0.4.0 visdat_0.6.0
[97] svglite_2.1.3 pkgconfig_2.0.3 tools_4.3.3
[100] cachem_1.0.8 viridisLite_0.4.2 numDeriv_2016.8-1.1
[103] fastmap_1.1.1 rmarkdown_2.26 sdamr_0.2.0
[106] mets_1.3.4 officer_0.6.5 carData_3.0-5
[109] rpart_4.1.23 farver_2.1.1 yaml_2.3.8
[112] foreign_0.8-86 cli_3.6.2 lifecycle_1.0.4
[115] askpass_1.2.0 mvtnorm_1.2-4 lava_1.8.0
[118] sessioninfo_1.2.2 backports_1.4.1 timechange_0.3.0
[121] gtable_0.3.4 jsonlite_1.8.8 mitml_0.4-5
[124] pwr_1.3-0 zip_2.3.1 ranger_0.16.0
[127] highr_0.10 polspline_1.1.24 survMisc_0.5.6
[130] R.utils_2.12.3 timeDate_4032.109 shiny_1.8.1
[133] gfonts_0.2.0 timereg_2.0.5 broom.helpers_1.14.0
[136] gdtools_0.3.7 rprojroot_2.0.4 R6_2.5.1
[139] km.ci_0.5-6 vcd_1.4-12 cluster_2.1.6
[142] pkgload_1.3.4 ipred_0.9-14 nloptr_2.0.3
[145] tidyselect_1.2.1 htmlTable_2.4.2 fontBitstreamVera_0.1.1
[148] car_3.1-2 munsell_0.5.0 laeken_0.5.3
[151] fontquiver_0.2.1 data.table_1.15.4 htmlwidgets_1.6.4
[154] RColorBrewer_1.1-3 rlang_1.1.3 uuid_1.2-0
[157] remotes_2.5.0 fansi_1.0.6 hardhat_1.3.1